1 /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
33 #include "gomp-constants.h"
35 typedef struct gomp_task_depend_entry
*hash_entry_type
;
38 htab_alloc (size_t size
)
40 return gomp_malloc (size
);
51 static inline hashval_t
52 htab_hash (hash_entry_type element
)
54 return hash_pointer (element
->addr
);
58 htab_eq (hash_entry_type x
, hash_entry_type y
)
60 return x
->addr
== y
->addr
;
63 /* Create a new task data structure. */
66 gomp_init_task (struct gomp_task
*task
, struct gomp_task
*parent_task
,
67 struct gomp_task_icv
*prev_icv
)
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task
->parent
= parent_task
;
78 priority_queue_init (&task
->children_queue
);
79 task
->taskgroup
= NULL
;
80 task
->dependers
= NULL
;
81 task
->depend_hash
= NULL
;
82 task
->taskwait
= NULL
;
83 task
->depend_all_memory
= NULL
;
84 task
->depend_count
= 0;
85 task
->completion_sem
= NULL
;
86 task
->deferred_p
= false;
87 task
->icv
= *prev_icv
;
88 task
->kind
= GOMP_TASK_IMPLICIT
;
89 task
->in_tied_task
= false;
90 task
->final_task
= false;
91 task
->copy_ctors_done
= false;
92 task
->parent_depends_on
= false;
95 /* Clean up a task, after completing it. */
100 struct gomp_thread
*thr
= gomp_thread ();
101 struct gomp_task
*task
= thr
->task
;
103 gomp_finish_task (task
);
104 thr
->task
= task
->parent
;
107 /* Clear the parent field of every task in LIST. */
110 gomp_clear_parent_in_list (struct priority_list
*list
)
112 struct priority_node
*p
= list
->tasks
;
116 priority_node_to_task (PQ_CHILDREN
, p
)->parent
= NULL
;
119 while (p
!= list
->tasks
);
122 /* Splay tree version of gomp_clear_parent_in_list.
124 Clear the parent field of every task in NODE within SP, and free
125 the node when done. */
128 gomp_clear_parent_in_tree (prio_splay_tree sp
, prio_splay_tree_node node
)
132 prio_splay_tree_node left
= node
->left
, right
= node
->right
;
133 gomp_clear_parent_in_list (&node
->key
.l
);
134 #if _LIBGOMP_CHECKING_
135 memset (node
, 0xaf, sizeof (*node
));
137 /* No need to remove the node from the tree. We're nuking
138 everything, so just free the nodes and our caller can clear the
139 entire splay tree. */
141 gomp_clear_parent_in_tree (sp
, left
);
142 gomp_clear_parent_in_tree (sp
, right
);
145 /* Clear the parent field of every task in Q and remove every task
149 gomp_clear_parent (struct priority_queue
*q
)
151 if (priority_queue_multi_p (q
))
153 gomp_clear_parent_in_tree (&q
->t
, q
->t
.root
);
154 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
155 No need to remove anything. We can just nuke everything. */
159 gomp_clear_parent_in_list (&q
->l
);
162 /* Helper function for GOMP_task and gomp_create_target_task.
164 For a TASK with in/out dependencies, fill in the various dependency
165 queues. PARENT is the parent of said task. DEPEND is as in
169 gomp_task_handle_depend (struct gomp_task
*task
, struct gomp_task
*parent
,
172 size_t ndepend
= (uintptr_t) depend
[0];
175 bool all_memory
= false;
179 /* depend[0] is total # */
180 size_t nout
= (uintptr_t) depend
[1]; /* # of out: and inout: */
181 /* ndepend - nout is # of in: */
182 for (i
= 0; i
< ndepend
; i
++)
184 task
->depend
[i
].addr
= depend
[2 + i
];
185 task
->depend
[i
].is_in
= i
>= nout
;
186 all_memory
|= i
< nout
&& depend
[2 + i
] == NULL
;
191 ndepend
= (uintptr_t) depend
[1]; /* total # */
192 size_t nout
= (uintptr_t) depend
[2]; /* # of out: and inout: */
193 size_t nmutexinoutset
= (uintptr_t) depend
[3]; /* # of mutexinoutset: */
194 /* For now we treat mutexinoutset like out, which is compliant, but
196 size_t nin
= (uintptr_t) depend
[4]; /* # of in: */
197 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
198 size_t normal
= nout
+ nmutexinoutset
+ nin
;
201 for (i
= normal
; i
< ndepend
; i
++)
203 void **d
= (void **) (uintptr_t) depend
[5 + i
];
204 switch ((uintptr_t) d
[1])
206 case GOMP_DEPEND_OUT
:
207 case GOMP_DEPEND_INOUT
:
208 all_memory
|= d
[0] == NULL
;
210 case GOMP_DEPEND_MUTEXINOUTSET
:
213 case GOMP_DEPEND_INOUTSET
:
217 gomp_fatal ("unknown omp_depend_t dependence type %d",
218 (int) (uintptr_t) d
[1]);
220 task
->depend
[n
].addr
= d
[0];
221 task
->depend
[n
++].is_in
= 0;
223 for (i
= 0; i
< normal
; i
++)
225 task
->depend
[n
].addr
= depend
[5 + i
];
226 task
->depend
[n
++].is_in
= i
>= nout
+ nmutexinoutset
;
229 for (i
= normal
; i
< ndepend
; i
++)
231 void **d
= (void **) (uintptr_t) depend
[5 + i
];
232 if ((uintptr_t) d
[1] != GOMP_DEPEND_IN
233 && (uintptr_t) d
[1] != GOMP_DEPEND_INOUTSET
)
235 task
->depend
[n
].addr
= d
[0];
236 task
->depend
[n
++].is_in
237 = 1 + ((uintptr_t) d
[1] == GOMP_DEPEND_INOUTSET
);
240 task
->num_dependees
= 0;
241 if (__builtin_expect (parent
->depend_all_memory
&& ndepend
, false))
243 struct gomp_task
*tsk
= parent
->depend_all_memory
;
244 if (tsk
->dependers
== NULL
)
247 = gomp_malloc (sizeof (struct gomp_dependers_vec
)
248 + 6 * sizeof (struct gomp_task
*));
249 tsk
->dependers
->n_elem
= 1;
250 tsk
->dependers
->allocated
= 6;
251 tsk
->dependers
->elem
[0] = task
;
255 if (tsk
->dependers
->n_elem
== tsk
->dependers
->allocated
)
257 tsk
->dependers
->allocated
258 = tsk
->dependers
->allocated
* 2 + 2;
260 = gomp_realloc (tsk
->dependers
,
261 sizeof (struct gomp_dependers_vec
)
262 + (tsk
->dependers
->allocated
263 * sizeof (struct gomp_task
*)));
265 tsk
->dependers
->elem
[tsk
->dependers
->n_elem
++] = task
;
267 task
->num_dependees
++;
269 if (__builtin_expect (all_memory
, false))
271 /* A task with depend(inout: omp_all_memory) depends on all previous
272 sibling tasks which have any dependencies and all later sibling
273 tasks which have any dependencies depend on it. */
274 task
->depend_count
= 1;
275 task
->depend
[0].addr
= NULL
;
276 task
->depend
[0].next
= NULL
;
277 task
->depend
[0].prev
= NULL
;
278 task
->depend
[0].task
= task
;
279 task
->depend
[0].redundant
= true;
280 task
->depend
[0].redundant_out
= false;
281 if (parent
->depend_hash
)
283 /* Inlined htab_traverse + htab_clear. All newer siblings can
284 just depend on this task. Add dependencies on all previous
285 sibling tasks with dependencies and make them redundant and
286 clear the hash table. */
287 hash_entry_type
*slot
= &parent
->depend_hash
->entries
[0];
288 hash_entry_type
*end
= slot
+ htab_size (parent
->depend_hash
);
289 for (; slot
!= end
; ++slot
)
291 if (*slot
== HTAB_EMPTY_ENTRY
)
293 if (*slot
!= HTAB_DELETED_ENTRY
)
295 for (ent
= *slot
; ent
; ent
= ent
->next
)
297 struct gomp_task
*tsk
= ent
->task
;
299 if (ent
->redundant_out
)
302 ent
->redundant
= true;
303 if (tsk
->dependers
== NULL
)
306 = gomp_malloc (sizeof (struct gomp_dependers_vec
)
307 + 6 * sizeof (struct gomp_task
*));
308 tsk
->dependers
->n_elem
= 1;
309 tsk
->dependers
->allocated
= 6;
310 tsk
->dependers
->elem
[0] = task
;
311 task
->num_dependees
++;
314 /* We already have some other dependency on tsk from
315 earlier depend clause. */
316 else if (tsk
->dependers
->n_elem
317 && (tsk
->dependers
->elem
[tsk
->dependers
->n_elem
320 else if (tsk
->dependers
->n_elem
321 == tsk
->dependers
->allocated
)
323 tsk
->dependers
->allocated
324 = tsk
->dependers
->allocated
* 2 + 2;
326 = gomp_realloc (tsk
->dependers
,
327 sizeof (struct gomp_dependers_vec
)
328 + (tsk
->dependers
->allocated
329 * sizeof (struct gomp_task
*)));
331 tsk
->dependers
->elem
[tsk
->dependers
->n_elem
++] = task
;
332 task
->num_dependees
++;
336 ent
->redundant
= true;
340 *slot
= HTAB_EMPTY_ENTRY
;
342 if (htab_size (parent
->depend_hash
) <= 32)
344 parent
->depend_hash
->n_elements
= 0;
345 parent
->depend_hash
->n_deleted
= 0;
349 /* Shrink the hash table if it would be too large.
350 We don't want to walk e.g. megabytes of empty hash
351 table for every depend(inout: omp_all_memory). */
352 free (parent
->depend_hash
);
353 parent
->depend_hash
= htab_create (12);
356 parent
->depend_all_memory
= task
;
359 task
->depend_count
= ndepend
;
360 if (parent
->depend_hash
== NULL
)
361 parent
->depend_hash
= htab_create (2 * ndepend
> 12 ? 2 * ndepend
: 12);
362 for (i
= 0; i
< ndepend
; i
++)
364 task
->depend
[i
].next
= NULL
;
365 task
->depend
[i
].prev
= NULL
;
366 task
->depend
[i
].task
= task
;
367 task
->depend
[i
].redundant
= false;
368 task
->depend
[i
].redundant_out
= false;
370 hash_entry_type
*slot
= htab_find_slot (&parent
->depend_hash
,
371 &task
->depend
[i
], INSERT
);
372 hash_entry_type out
= NULL
, last
= NULL
;
375 /* If multiple depends on the same task are the same, all but the
376 first one are redundant. As inout/out come first, if any of them
377 is inout/out, it will win, which is the right semantics. */
378 if ((*slot
)->task
== task
)
380 task
->depend
[i
].redundant
= true;
383 for (ent
= *slot
; ent
; ent
= ent
->next
)
385 if (ent
->redundant_out
)
390 /* depend(in:...) doesn't depend on earlier depend(in:...).
391 Similarly depend(inoutset:...) doesn't depend on earlier
392 depend(inoutset:...). */
393 if (task
->depend
[i
].is_in
&& task
->depend
[i
].is_in
== ent
->is_in
)
399 struct gomp_task
*tsk
= ent
->task
;
400 if (tsk
->dependers
== NULL
)
403 = gomp_malloc (sizeof (struct gomp_dependers_vec
)
404 + 6 * sizeof (struct gomp_task
*));
405 tsk
->dependers
->n_elem
= 1;
406 tsk
->dependers
->allocated
= 6;
407 tsk
->dependers
->elem
[0] = task
;
408 task
->num_dependees
++;
411 /* We already have some other dependency on tsk from earlier
413 else if (tsk
->dependers
->n_elem
414 && (tsk
->dependers
->elem
[tsk
->dependers
->n_elem
- 1]
417 else if (tsk
->dependers
->n_elem
== tsk
->dependers
->allocated
)
419 tsk
->dependers
->allocated
420 = tsk
->dependers
->allocated
* 2 + 2;
422 = gomp_realloc (tsk
->dependers
,
423 sizeof (struct gomp_dependers_vec
)
424 + (tsk
->dependers
->allocated
425 * sizeof (struct gomp_task
*)));
427 tsk
->dependers
->elem
[tsk
->dependers
->n_elem
++] = task
;
428 task
->num_dependees
++;
430 task
->depend
[i
].next
= *slot
;
431 (*slot
)->prev
= &task
->depend
[i
];
433 *slot
= &task
->depend
[i
];
435 /* There is no need to store more than one depend({,in}out:) task per
436 address in the hash table chain for the purpose of creation of
437 deferred tasks, because each out depends on all earlier outs, thus it
438 is enough to record just the last depend({,in}out:). For depend(in:),
439 we need to keep all of the previous ones not terminated yet, because
440 a later depend({,in}out:) might need to depend on all of them. So, if
441 the new task's clause is depend({,in}out:), we know there is at most
442 one other depend({,in}out:) clause in the list (out). For
443 non-deferred tasks we want to see all outs, so they are moved to the
444 end of the chain, after first redundant_out entry all following
445 entries should be redundant_out. */
446 if (!task
->depend
[i
].is_in
&& out
)
450 out
->next
->prev
= out
->prev
;
451 out
->prev
->next
= out
->next
;
452 out
->next
= last
->next
;
456 out
->next
->prev
= out
;
458 out
->redundant_out
= true;
463 /* Body of empty task like taskwait nowait depend. */
466 empty_task (void *data
__attribute__((unused
)))
470 static void gomp_task_run_post_handle_depend_hash (struct gomp_task
*);
471 static inline size_t gomp_task_run_post_handle_depend (struct gomp_task
*,
474 /* Called when encountering an explicit task directive. If IF_CLAUSE is
475 false, then we must not delay in executing the task. If UNTIED is true,
476 then the task may be executed by any member of the team.
478 DEPEND is an array containing:
479 if depend[0] is non-zero, then:
480 depend[0]: number of depend elements.
481 depend[1]: number of depend elements of type "out/inout".
482 depend[2..N+1]: address of [1..N]th depend element.
483 otherwise, when depend[0] is zero, then:
484 depend[1]: number of depend elements.
485 depend[2]: number of depend elements of type "out/inout".
486 depend[3]: number of depend elements of type "mutexinoutset".
487 depend[4]: number of depend elements of type "in".
488 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
489 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
490 omp_depend_t objects. */
493 GOMP_task (void (*fn
) (void *), void *data
, void (*cpyfn
) (void *, void *),
494 long arg_size
, long arg_align
, bool if_clause
, unsigned flags
,
495 void **depend
, int priority_arg
, void *detach
)
497 struct gomp_thread
*thr
= gomp_thread ();
498 struct gomp_team
*team
= thr
->ts
.team
;
501 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
502 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
503 tied to one thread all the time. This means UNTIED tasks must be
504 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
505 might be running on different thread than FN. */
508 flags
&= ~GOMP_TASK_FLAG_UNTIED
;
511 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
512 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
514 if (gomp_team_barrier_cancelled (&team
->barrier
))
516 if (thr
->task
->taskgroup
)
518 if (thr
->task
->taskgroup
->cancelled
)
520 if (thr
->task
->taskgroup
->workshare
521 && thr
->task
->taskgroup
->prev
522 && thr
->task
->taskgroup
->prev
->cancelled
)
527 if (__builtin_expect ((flags
& GOMP_TASK_FLAG_PRIORITY
) != 0, 0))
529 priority
= priority_arg
;
530 if (priority
> gomp_max_task_priority_var
)
531 priority
= gomp_max_task_priority_var
;
534 if (!if_clause
|| team
== NULL
535 || (thr
->task
&& thr
->task
->final_task
)
536 || team
->task_count
> 64 * team
->nthreads
)
538 struct gomp_task task
;
539 gomp_sem_t completion_sem
;
541 /* If there are depend clauses and earlier deferred sibling tasks
542 with depend clauses, check if there isn't a dependency. If there
543 is, we need to wait for them. There is no need to handle
544 depend clauses for non-deferred tasks other than this, because
545 the parent task is suspended until the child task finishes and thus
546 it can't start further child tasks. */
547 if ((flags
& GOMP_TASK_FLAG_DEPEND
)
548 && thr
->task
&& thr
->task
->depend_hash
)
549 gomp_task_maybe_wait_for_dependencies (depend
);
551 gomp_init_task (&task
, thr
->task
, gomp_icv (false));
552 task
.kind
= GOMP_TASK_UNDEFERRED
;
553 task
.final_task
= (thr
->task
&& thr
->task
->final_task
)
554 || (flags
& GOMP_TASK_FLAG_FINAL
);
555 task
.priority
= priority
;
557 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
559 gomp_sem_init (&completion_sem
, 0);
560 task
.completion_sem
= &completion_sem
;
561 *(void **) detach
= &task
;
563 *(void **) data
= &task
;
565 gomp_debug (0, "Thread %d: new event: %p\n",
566 thr
->ts
.team_id
, &task
);
571 task
.in_tied_task
= thr
->task
->in_tied_task
;
572 task
.taskgroup
= thr
->task
->taskgroup
;
575 if (__builtin_expect (cpyfn
!= NULL
, 0))
577 char buf
[arg_size
+ arg_align
- 1];
578 char *arg
= (char *) (((uintptr_t) buf
+ arg_align
- 1)
579 & ~(uintptr_t) (arg_align
- 1));
586 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
588 gomp_sem_wait (&completion_sem
);
589 gomp_sem_destroy (&completion_sem
);
592 /* Access to "children" is normally done inside a task_lock
593 mutex region, but the only way this particular task.children
594 can be set is if this thread's task work function (fn)
595 creates children. So since the setter is *this* thread, we
596 need no barriers here when testing for non-NULL. We can have
597 task.children set by the current thread then changed by a
598 child thread, but seeing a stale non-NULL value is not a
599 problem. Once past the task_lock acquisition, this thread
600 will see the real value of task.children. */
601 if (!priority_queue_empty_p (&task
.children_queue
, MEMMODEL_RELAXED
))
603 gomp_mutex_lock (&team
->task_lock
);
604 gomp_clear_parent (&task
.children_queue
);
605 gomp_mutex_unlock (&team
->task_lock
);
611 struct gomp_task
*task
;
612 struct gomp_task
*parent
= thr
->task
;
613 struct gomp_taskgroup
*taskgroup
= parent
->taskgroup
;
616 size_t depend_size
= 0;
618 if (flags
& GOMP_TASK_FLAG_DEPEND
)
619 depend_size
= ((uintptr_t) (depend
[0] ? depend
[0] : depend
[1])
620 * sizeof (struct gomp_task_depend_entry
));
621 task
= gomp_malloc (sizeof (*task
) + depend_size
622 + arg_size
+ arg_align
- 1);
623 arg
= (char *) (((uintptr_t) (task
+ 1) + depend_size
+ arg_align
- 1)
624 & ~(uintptr_t) (arg_align
- 1));
625 gomp_init_task (task
, parent
, gomp_icv (false));
626 task
->priority
= priority
;
627 task
->kind
= GOMP_TASK_UNDEFERRED
;
628 task
->in_tied_task
= parent
->in_tied_task
;
629 task
->taskgroup
= taskgroup
;
630 task
->deferred_p
= true;
631 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
633 task
->detach_team
= team
;
635 *(void **) detach
= task
;
637 *(void **) data
= task
;
639 gomp_debug (0, "Thread %d: new event: %p\n", thr
->ts
.team_id
, task
);
645 task
->copy_ctors_done
= true;
648 memcpy (arg
, data
, arg_size
);
650 task
->kind
= GOMP_TASK_WAITING
;
653 task
->final_task
= (flags
& GOMP_TASK_FLAG_FINAL
) >> 1;
654 gomp_mutex_lock (&team
->task_lock
);
655 /* If parallel or taskgroup has been cancelled, don't start new
657 if (__builtin_expect (gomp_cancel_var
, 0)
658 && !task
->copy_ctors_done
)
660 if (gomp_team_barrier_cancelled (&team
->barrier
))
663 gomp_mutex_unlock (&team
->task_lock
);
664 gomp_finish_task (task
);
670 if (taskgroup
->cancelled
)
672 if (taskgroup
->workshare
674 && taskgroup
->prev
->cancelled
)
679 taskgroup
->num_children
++;
682 gomp_task_handle_depend (task
, parent
, depend
);
683 if (task
->num_dependees
)
685 /* Tasks that depend on other tasks are not put into the
686 various waiting queues, so we are done for now. Said
687 tasks are instead put into the queues via
688 gomp_task_run_post_handle_dependers() after their
689 dependencies have been satisfied. After which, they
690 can be picked up by the various scheduling
692 gomp_mutex_unlock (&team
->task_lock
);
695 /* Check for taskwait nowait depend which doesn't need to wait for
697 if (__builtin_expect (fn
== empty_task
, 0))
700 taskgroup
->num_children
--;
701 gomp_task_run_post_handle_depend_hash (task
);
702 gomp_mutex_unlock (&team
->task_lock
);
703 gomp_finish_task (task
);
709 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
,
711 PRIORITY_INSERT_BEGIN
,
712 /*adjust_parent_depends_on=*/false,
713 task
->parent_depends_on
);
715 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
717 PRIORITY_INSERT_BEGIN
,
718 /*adjust_parent_depends_on=*/false,
719 task
->parent_depends_on
);
721 priority_queue_insert (PQ_TEAM
, &team
->task_queue
,
724 /*adjust_parent_depends_on=*/false,
725 task
->parent_depends_on
);
728 ++team
->task_queued_count
;
729 gomp_team_barrier_set_task_pending (&team
->barrier
);
730 do_wake
= team
->task_running_count
+ !parent
->in_tied_task
732 gomp_mutex_unlock (&team
->task_lock
);
734 gomp_team_barrier_wake (&team
->barrier
, 1);
739 ialias (GOMP_taskgroup_start
)
740 ialias (GOMP_taskgroup_end
)
741 ialias (GOMP_taskgroup_reduction_register
)
744 #define UTYPE unsigned long
745 #define TYPE_is_long 1
746 #include "taskloop.c"
751 #define TYPE unsigned long long
753 #define GOMP_taskloop GOMP_taskloop_ull
754 #include "taskloop.c"
760 priority_queue_move_task_first (enum priority_queue_type type
,
761 struct priority_queue
*head
,
762 struct gomp_task
*task
)
764 #if _LIBGOMP_CHECKING_
765 if (!priority_queue_task_in_queue_p (type
, head
, task
))
766 gomp_fatal ("Attempt to move first missing task %p", task
);
768 struct priority_list
*list
;
769 if (priority_queue_multi_p (head
))
771 list
= priority_queue_lookup_priority (head
, task
->priority
);
772 #if _LIBGOMP_CHECKING_
774 gomp_fatal ("Unable to find priority %d", task
->priority
);
779 priority_list_remove (list
, task_to_priority_node (type
, task
), 0);
780 priority_list_insert (type
, list
, task
, task
->priority
,
781 PRIORITY_INSERT_BEGIN
, type
== PQ_CHILDREN
,
782 task
->parent_depends_on
);
785 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
786 with team->task_lock held, or is executed in the thread that called
787 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
788 run before it acquires team->task_lock. */
791 gomp_target_task_completion (struct gomp_team
*team
, struct gomp_task
*task
)
793 struct gomp_task
*parent
= task
->parent
;
795 priority_queue_move_task_first (PQ_CHILDREN
, &parent
->children_queue
,
798 struct gomp_taskgroup
*taskgroup
= task
->taskgroup
;
800 priority_queue_move_task_first (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
803 priority_queue_insert (PQ_TEAM
, &team
->task_queue
, task
, task
->priority
,
804 PRIORITY_INSERT_BEGIN
, false,
805 task
->parent_depends_on
);
806 task
->kind
= GOMP_TASK_WAITING
;
807 if (parent
&& parent
->taskwait
)
809 if (parent
->taskwait
->in_taskwait
)
811 /* One more task has had its dependencies met.
812 Inform any waiters. */
813 parent
->taskwait
->in_taskwait
= false;
814 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
816 else if (parent
->taskwait
->in_depend_wait
)
818 /* One more task has had its dependencies met.
819 Inform any waiters. */
820 parent
->taskwait
->in_depend_wait
= false;
821 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
824 if (taskgroup
&& taskgroup
->in_taskgroup_wait
)
826 /* One more task has had its dependencies met.
827 Inform any waiters. */
828 taskgroup
->in_taskgroup_wait
= false;
829 gomp_sem_post (&taskgroup
->taskgroup_sem
);
832 ++team
->task_queued_count
;
833 gomp_team_barrier_set_task_pending (&team
->barrier
);
834 /* I'm afraid this can't be done after releasing team->task_lock,
835 as gomp_target_task_completion is run from unrelated thread and
836 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
837 the team could be gone already. */
838 if (team
->nthreads
> team
->task_running_count
)
839 gomp_team_barrier_wake (&team
->barrier
, 1);
842 /* Signal that a target task TTASK has completed the asynchronously
843 running phase and should be requeued as a task to handle the
844 variable unmapping. */
847 GOMP_PLUGIN_target_task_completion (void *data
)
849 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
850 struct gomp_task
*task
= ttask
->task
;
851 struct gomp_team
*team
= ttask
->team
;
853 gomp_mutex_lock (&team
->task_lock
);
854 if (ttask
->state
== GOMP_TARGET_TASK_READY_TO_RUN
)
856 ttask
->state
= GOMP_TARGET_TASK_FINISHED
;
857 gomp_mutex_unlock (&team
->task_lock
);
860 ttask
->state
= GOMP_TARGET_TASK_FINISHED
;
861 gomp_target_task_completion (team
, task
);
862 gomp_mutex_unlock (&team
->task_lock
);
865 /* Called for nowait target tasks. */
868 gomp_create_target_task (struct gomp_device_descr
*devicep
,
869 void (*fn
) (void *), size_t mapnum
, void **hostaddrs
,
870 size_t *sizes
, unsigned short *kinds
,
871 unsigned int flags
, void **depend
, void **args
,
872 enum gomp_target_task_state state
)
874 struct gomp_thread
*thr
= gomp_thread ();
875 struct gomp_team
*team
= thr
->ts
.team
;
877 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
878 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
880 if (gomp_team_barrier_cancelled (&team
->barrier
))
882 if (thr
->task
->taskgroup
)
884 if (thr
->task
->taskgroup
->cancelled
)
886 if (thr
->task
->taskgroup
->workshare
887 && thr
->task
->taskgroup
->prev
888 && thr
->task
->taskgroup
->prev
->cancelled
)
893 struct gomp_target_task
*ttask
;
894 struct gomp_task
*task
;
895 struct gomp_task
*parent
= thr
->task
;
896 struct gomp_taskgroup
*taskgroup
= parent
->taskgroup
;
898 size_t depend_size
= 0;
899 uintptr_t depend_cnt
= 0;
900 size_t tgt_align
= 0, tgt_size
= 0;
901 uintptr_t args_cnt
= 0;
905 depend_cnt
= (uintptr_t) (depend
[0] ? depend
[0] : depend
[1]);
906 depend_size
= depend_cnt
* sizeof (struct gomp_task_depend_entry
);
910 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
911 firstprivate on the target task. */
913 for (i
= 0; i
< mapnum
; i
++)
914 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
916 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
917 if (tgt_align
< align
)
919 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
920 tgt_size
+= sizes
[i
];
923 tgt_size
+= tgt_align
- 1;
931 intptr_t id
= (intptr_t) *cargs
++;
932 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
935 args_cnt
= cargs
+ 1 - args
;
939 task
= gomp_malloc (sizeof (*task
) + depend_size
941 + args_cnt
* sizeof (void *)
942 + mapnum
* (sizeof (void *) + sizeof (size_t)
943 + sizeof (unsigned short))
945 gomp_init_task (task
, parent
, gomp_icv (false));
947 task
->kind
= GOMP_TASK_WAITING
;
948 task
->in_tied_task
= parent
->in_tied_task
;
949 task
->taskgroup
= taskgroup
;
950 ttask
= (struct gomp_target_task
*) &task
->depend
[depend_cnt
];
951 ttask
->devicep
= devicep
;
953 ttask
->mapnum
= mapnum
;
954 memcpy (ttask
->hostaddrs
, hostaddrs
, mapnum
* sizeof (void *));
957 ttask
->args
= (void **) &ttask
->hostaddrs
[mapnum
];
958 memcpy (ttask
->args
, args
, args_cnt
* sizeof (void *));
959 ttask
->sizes
= (size_t *) &ttask
->args
[args_cnt
];
964 ttask
->sizes
= (size_t *) &ttask
->hostaddrs
[mapnum
];
966 memcpy (ttask
->sizes
, sizes
, mapnum
* sizeof (size_t));
967 ttask
->kinds
= (unsigned short *) &ttask
->sizes
[mapnum
];
968 memcpy (ttask
->kinds
, kinds
, mapnum
* sizeof (unsigned short));
971 char *tgt
= (char *) &ttask
->kinds
[mapnum
];
973 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
975 tgt
+= tgt_align
- al
;
977 for (i
= 0; i
< mapnum
; i
++)
978 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
980 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
981 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
982 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
983 ttask
->hostaddrs
[i
] = tgt
+ tgt_size
;
984 tgt_size
= tgt_size
+ sizes
[i
];
987 ttask
->flags
= flags
;
988 ttask
->state
= state
;
992 task
->fn_data
= ttask
;
993 task
->final_task
= 0;
994 gomp_mutex_lock (&team
->task_lock
);
995 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
996 if (__builtin_expect (gomp_cancel_var
, 0))
998 if (gomp_team_barrier_cancelled (&team
->barrier
))
1001 gomp_mutex_unlock (&team
->task_lock
);
1002 gomp_finish_task (task
);
1008 if (taskgroup
->cancelled
)
1010 if (taskgroup
->workshare
1012 && taskgroup
->prev
->cancelled
)
1018 gomp_task_handle_depend (task
, parent
, depend
);
1019 if (task
->num_dependees
)
1022 taskgroup
->num_children
++;
1023 gomp_mutex_unlock (&team
->task_lock
);
1027 if (state
== GOMP_TARGET_TASK_DATA
)
1029 gomp_task_run_post_handle_depend_hash (task
);
1030 gomp_mutex_unlock (&team
->task_lock
);
1031 gomp_finish_task (task
);
1036 taskgroup
->num_children
++;
1037 /* For async offloading, if we don't need to wait for dependencies,
1038 run the gomp_target_task_fn right away, essentially schedule the
1039 mapping part of the task in the current thread. */
1041 && (devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
1043 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
, task
, 0,
1044 PRIORITY_INSERT_END
,
1045 /*adjust_parent_depends_on=*/false,
1046 task
->parent_depends_on
);
1048 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
1049 task
, 0, PRIORITY_INSERT_END
,
1050 /*adjust_parent_depends_on=*/false,
1051 task
->parent_depends_on
);
1052 task
->pnode
[PQ_TEAM
].next
= NULL
;
1053 task
->pnode
[PQ_TEAM
].prev
= NULL
;
1054 task
->kind
= GOMP_TASK_TIED
;
1056 gomp_mutex_unlock (&team
->task_lock
);
1059 gomp_target_task_fn (task
->fn_data
);
1062 gomp_mutex_lock (&team
->task_lock
);
1063 task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1064 /* If GOMP_PLUGIN_target_task_completion has run already
1065 in between gomp_target_task_fn and the mutex lock,
1066 perform the requeuing here. */
1067 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1068 gomp_target_task_completion (team
, task
);
1070 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1071 gomp_mutex_unlock (&team
->task_lock
);
1074 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
, task
, 0,
1075 PRIORITY_INSERT_BEGIN
,
1076 /*adjust_parent_depends_on=*/false,
1077 task
->parent_depends_on
);
1079 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
, task
, 0,
1080 PRIORITY_INSERT_BEGIN
,
1081 /*adjust_parent_depends_on=*/false,
1082 task
->parent_depends_on
);
1083 priority_queue_insert (PQ_TEAM
, &team
->task_queue
, task
, 0,
1084 PRIORITY_INSERT_END
,
1085 /*adjust_parent_depends_on=*/false,
1086 task
->parent_depends_on
);
1088 ++team
->task_queued_count
;
1089 gomp_team_barrier_set_task_pending (&team
->barrier
);
1090 do_wake
= team
->task_running_count
+ !parent
->in_tied_task
1092 gomp_mutex_unlock (&team
->task_lock
);
1094 gomp_team_barrier_wake (&team
->barrier
, 1);
1098 /* Given a parent_depends_on task in LIST, move it to the front of its
1099 priority so it is run as soon as possible.
1101 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
1103 We rearrange the queue such that all parent_depends_on tasks are
1104 first, and last_parent_depends_on points to the last such task we
1105 rearranged. For example, given the following tasks in a queue
1106 where PD[123] are the parent_depends_on tasks:
1111 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
1113 We rearrange such that:
1116 | +--- last_parent_depends_on
1119 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
1122 priority_list_upgrade_task (struct priority_list
*list
,
1123 struct priority_node
*node
)
1125 struct priority_node
*last_parent_depends_on
1126 = list
->last_parent_depends_on
;
1127 if (last_parent_depends_on
)
1129 node
->prev
->next
= node
->next
;
1130 node
->next
->prev
= node
->prev
;
1131 node
->prev
= last_parent_depends_on
;
1132 node
->next
= last_parent_depends_on
->next
;
1133 node
->prev
->next
= node
;
1134 node
->next
->prev
= node
;
1136 else if (node
!= list
->tasks
)
1138 node
->prev
->next
= node
->next
;
1139 node
->next
->prev
= node
->prev
;
1140 node
->prev
= list
->tasks
->prev
;
1141 node
->next
= list
->tasks
;
1143 node
->prev
->next
= node
;
1144 node
->next
->prev
= node
;
1146 list
->last_parent_depends_on
= node
;
1149 /* Given a parent_depends_on TASK in its parent's children_queue, move
1150 it to the front of its priority so it is run as soon as possible.
1152 PARENT is passed as an optimization.
1154 (This function could be defined in priority_queue.c, but we want it
1155 inlined, and putting it in priority_queue.h is not an option, given
1156 that gomp_task has not been properly defined at that point). */
1159 priority_queue_upgrade_task (struct gomp_task
*task
,
1160 struct gomp_task
*parent
)
1162 struct priority_queue
*head
= &parent
->children_queue
;
1163 struct priority_node
*node
= &task
->pnode
[PQ_CHILDREN
];
1164 #if _LIBGOMP_CHECKING_
1165 if (!task
->parent_depends_on
)
1166 gomp_fatal ("priority_queue_upgrade_task: task must be a "
1167 "parent_depends_on task");
1168 if (!priority_queue_task_in_queue_p (PQ_CHILDREN
, head
, task
))
1169 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task
);
1171 if (priority_queue_multi_p (head
))
1173 struct priority_list
*list
1174 = priority_queue_lookup_priority (head
, task
->priority
);
1175 priority_list_upgrade_task (list
, node
);
1178 priority_list_upgrade_task (&head
->l
, node
);
1181 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1182 the way in LIST so that other tasks can be considered for
1183 execution. LIST contains tasks of type TYPE.
1185 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1189 priority_list_downgrade_task (enum priority_queue_type type
,
1190 struct priority_list
*list
,
1191 struct gomp_task
*child_task
)
1193 struct priority_node
*node
= task_to_priority_node (type
, child_task
);
1194 if (list
->tasks
== node
)
1195 list
->tasks
= node
->next
;
1196 else if (node
->next
!= list
->tasks
)
1198 /* The task in NODE is about to become TIED and TIED tasks
1199 cannot come before WAITING tasks. If we're about to
1200 leave the queue in such an indeterminate state, rewire
1201 things appropriately. However, a TIED task at the end is
1203 struct gomp_task
*next_task
= priority_node_to_task (type
, node
->next
);
1204 if (next_task
->kind
== GOMP_TASK_WAITING
)
1206 /* Remove from list. */
1207 node
->prev
->next
= node
->next
;
1208 node
->next
->prev
= node
->prev
;
1209 /* Rewire at the end. */
1210 node
->next
= list
->tasks
;
1211 node
->prev
= list
->tasks
->prev
;
1212 list
->tasks
->prev
->next
= node
;
1213 list
->tasks
->prev
= node
;
1217 /* If the current task is the last_parent_depends_on for its
1218 priority, adjust last_parent_depends_on appropriately. */
1219 if (__builtin_expect (child_task
->parent_depends_on
, 0)
1220 && list
->last_parent_depends_on
== node
)
1222 struct gomp_task
*prev_child
= priority_node_to_task (type
, node
->prev
);
1223 if (node
->prev
!= node
1224 && prev_child
->kind
== GOMP_TASK_WAITING
1225 && prev_child
->parent_depends_on
)
1226 list
->last_parent_depends_on
= node
->prev
;
1229 /* There are no more parent_depends_on entries waiting
1230 to run, clear the list. */
1231 list
->last_parent_depends_on
= NULL
;
1236 /* Given a TASK in HEAD that is about to be executed, move it out of
1237 the way so that other tasks can be considered for execution. HEAD
1238 contains tasks of type TYPE.
1240 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1243 (This function could be defined in priority_queue.c, but we want it
1244 inlined, and putting it in priority_queue.h is not an option, given
1245 that gomp_task has not been properly defined at that point). */
1248 priority_queue_downgrade_task (enum priority_queue_type type
,
1249 struct priority_queue
*head
,
1250 struct gomp_task
*task
)
1252 #if _LIBGOMP_CHECKING_
1253 if (!priority_queue_task_in_queue_p (type
, head
, task
))
1254 gomp_fatal ("Attempt to downgrade missing task %p", task
);
1256 if (priority_queue_multi_p (head
))
1258 struct priority_list
*list
1259 = priority_queue_lookup_priority (head
, task
->priority
);
1260 priority_list_downgrade_task (type
, list
, task
);
1263 priority_list_downgrade_task (type
, &head
->l
, task
);
1266 /* Setup CHILD_TASK to execute. This is done by setting the task to
1267 TIED, and updating all relevant queues so that CHILD_TASK is no
1268 longer chosen for scheduling. Also, remove CHILD_TASK from the
1269 overall team task queue entirely.
1271 Return TRUE if task or its containing taskgroup has been
1275 gomp_task_run_pre (struct gomp_task
*child_task
, struct gomp_task
*parent
,
1276 struct gomp_team
*team
)
1278 #if _LIBGOMP_CHECKING_
1279 if (child_task
->parent
)
1280 priority_queue_verify (PQ_CHILDREN
,
1281 &child_task
->parent
->children_queue
, true);
1282 if (child_task
->taskgroup
)
1283 priority_queue_verify (PQ_TASKGROUP
,
1284 &child_task
->taskgroup
->taskgroup_queue
, false);
1285 priority_queue_verify (PQ_TEAM
, &team
->task_queue
, false);
1288 /* Task is about to go tied, move it out of the way. */
1290 priority_queue_downgrade_task (PQ_CHILDREN
, &parent
->children_queue
,
1293 /* Task is about to go tied, move it out of the way. */
1294 struct gomp_taskgroup
*taskgroup
= child_task
->taskgroup
;
1296 priority_queue_downgrade_task (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
1299 priority_queue_remove (PQ_TEAM
, &team
->task_queue
, child_task
,
1301 child_task
->pnode
[PQ_TEAM
].next
= NULL
;
1302 child_task
->pnode
[PQ_TEAM
].prev
= NULL
;
1303 child_task
->kind
= GOMP_TASK_TIED
;
1305 if (--team
->task_queued_count
== 0)
1306 gomp_team_barrier_clear_task_pending (&team
->barrier
);
1307 if (__builtin_expect (gomp_cancel_var
, 0)
1308 && !child_task
->copy_ctors_done
)
1310 if (gomp_team_barrier_cancelled (&team
->barrier
))
1314 if (taskgroup
->cancelled
)
1316 if (taskgroup
->workshare
1318 && taskgroup
->prev
->cancelled
)
1326 gomp_task_run_post_handle_depend_hash (struct gomp_task
*child_task
)
1328 struct gomp_task
*parent
= child_task
->parent
;
1331 if (parent
->depend_all_memory
== child_task
)
1332 parent
->depend_all_memory
= NULL
;
1333 for (i
= 0; i
< child_task
->depend_count
; i
++)
1334 if (!child_task
->depend
[i
].redundant
)
1336 if (child_task
->depend
[i
].next
)
1337 child_task
->depend
[i
].next
->prev
= child_task
->depend
[i
].prev
;
1338 if (child_task
->depend
[i
].prev
)
1339 child_task
->depend
[i
].prev
->next
= child_task
->depend
[i
].next
;
1342 hash_entry_type
*slot
1343 = htab_find_slot (&parent
->depend_hash
, &child_task
->depend
[i
],
1345 if (*slot
!= &child_task
->depend
[i
])
1347 if (child_task
->depend
[i
].next
)
1348 *slot
= child_task
->depend
[i
].next
;
1350 htab_clear_slot (parent
->depend_hash
, slot
);
1355 /* After a CHILD_TASK has been run, adjust the dependency queue for
1356 each task that depends on CHILD_TASK, to record the fact that there
1357 is one less dependency to worry about. If a task that depended on
1358 CHILD_TASK now has no dependencies, place it in the various queues
1359 so it gets scheduled to run.
1361 TEAM is the team to which CHILD_TASK belongs to. */
1364 gomp_task_run_post_handle_dependers (struct gomp_task
*child_task
,
1365 struct gomp_team
*team
)
1367 struct gomp_task
*parent
= child_task
->parent
;
1368 size_t i
, count
= child_task
->dependers
->n_elem
, ret
= 0;
1369 for (i
= 0; i
< count
; i
++)
1371 struct gomp_task
*task
= child_task
->dependers
->elem
[i
];
1373 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1374 TASK's remaining dependencies. Once TASK has no other
1375 dependencies, put it into the various queues so it will get
1376 scheduled for execution. */
1377 if (--task
->num_dependees
!= 0)
1380 struct gomp_taskgroup
*taskgroup
= task
->taskgroup
;
1381 if (__builtin_expect (task
->fn
== empty_task
, 0))
1384 task
->parent
= NULL
;
1385 else if (__builtin_expect (task
->parent_depends_on
, 0)
1386 && --parent
->taskwait
->n_depend
== 0
1387 && parent
->taskwait
->in_depend_wait
)
1389 parent
->taskwait
->in_depend_wait
= false;
1390 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1392 if (gomp_task_run_post_handle_depend (task
, team
))
1396 if (taskgroup
->num_children
> 1)
1397 --taskgroup
->num_children
;
1400 __atomic_store_n (&taskgroup
->num_children
, 0,
1402 if (taskgroup
->in_taskgroup_wait
)
1404 taskgroup
->in_taskgroup_wait
= false;
1405 gomp_sem_post (&taskgroup
->taskgroup_sem
);
1409 gomp_finish_task (task
);
1415 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
,
1416 task
, task
->priority
,
1417 PRIORITY_INSERT_BEGIN
,
1418 /*adjust_parent_depends_on=*/true,
1419 task
->parent_depends_on
);
1420 if (parent
->taskwait
)
1422 if (parent
->taskwait
->in_taskwait
)
1424 /* One more task has had its dependencies met.
1425 Inform any waiters. */
1426 parent
->taskwait
->in_taskwait
= false;
1427 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1429 else if (parent
->taskwait
->in_depend_wait
)
1431 /* One more task has had its dependencies met.
1432 Inform any waiters. */
1433 parent
->taskwait
->in_depend_wait
= false;
1434 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1439 task
->parent
= NULL
;
1442 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
1443 task
, task
->priority
,
1444 PRIORITY_INSERT_BEGIN
,
1445 /*adjust_parent_depends_on=*/false,
1446 task
->parent_depends_on
);
1447 if (taskgroup
->in_taskgroup_wait
)
1449 /* One more task has had its dependencies met.
1450 Inform any waiters. */
1451 taskgroup
->in_taskgroup_wait
= false;
1452 gomp_sem_post (&taskgroup
->taskgroup_sem
);
1455 priority_queue_insert (PQ_TEAM
, &team
->task_queue
,
1456 task
, task
->priority
,
1457 PRIORITY_INSERT_END
,
1458 /*adjust_parent_depends_on=*/false,
1459 task
->parent_depends_on
);
1461 ++team
->task_queued_count
;
1464 free (child_task
->dependers
);
1465 child_task
->dependers
= NULL
;
1467 gomp_team_barrier_set_task_pending (&team
->barrier
);
1471 static inline size_t
1472 gomp_task_run_post_handle_depend (struct gomp_task
*child_task
,
1473 struct gomp_team
*team
)
1475 if (child_task
->depend_count
== 0)
1478 /* If parent is gone already, the hash table is freed and nothing
1479 will use the hash table anymore, no need to remove anything from it. */
1480 if (child_task
->parent
!= NULL
)
1481 gomp_task_run_post_handle_depend_hash (child_task
);
1483 if (child_task
->dependers
== NULL
)
1486 return gomp_task_run_post_handle_dependers (child_task
, team
);
1489 /* Remove CHILD_TASK from its parent. */
1492 gomp_task_run_post_remove_parent (struct gomp_task
*child_task
)
1494 struct gomp_task
*parent
= child_task
->parent
;
1498 /* If this was the last task the parent was depending on,
1499 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1500 clean up and return. */
1501 if (__builtin_expect (child_task
->parent_depends_on
, 0)
1502 && --parent
->taskwait
->n_depend
== 0
1503 && parent
->taskwait
->in_depend_wait
)
1505 parent
->taskwait
->in_depend_wait
= false;
1506 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1509 if (priority_queue_remove (PQ_CHILDREN
, &parent
->children_queue
,
1510 child_task
, MEMMODEL_RELEASE
)
1511 && parent
->taskwait
&& parent
->taskwait
->in_taskwait
)
1513 parent
->taskwait
->in_taskwait
= false;
1514 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1516 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
1517 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
1520 /* Remove CHILD_TASK from its taskgroup. */
1523 gomp_task_run_post_remove_taskgroup (struct gomp_task
*child_task
)
1525 struct gomp_taskgroup
*taskgroup
= child_task
->taskgroup
;
1526 if (taskgroup
== NULL
)
1528 bool empty
= priority_queue_remove (PQ_TASKGROUP
,
1529 &taskgroup
->taskgroup_queue
,
1530 child_task
, MEMMODEL_RELAXED
);
1531 child_task
->pnode
[PQ_TASKGROUP
].next
= NULL
;
1532 child_task
->pnode
[PQ_TASKGROUP
].prev
= NULL
;
1533 if (taskgroup
->num_children
> 1)
1534 --taskgroup
->num_children
;
1537 /* We access taskgroup->num_children in GOMP_taskgroup_end
1538 outside of the task lock mutex region, so
1539 need a release barrier here to ensure memory
1540 written by child_task->fn above is flushed
1541 before the NULL is written. */
1542 __atomic_store_n (&taskgroup
->num_children
, 0, MEMMODEL_RELEASE
);
1544 if (empty
&& taskgroup
->in_taskgroup_wait
)
1546 taskgroup
->in_taskgroup_wait
= false;
1547 gomp_sem_post (&taskgroup
->taskgroup_sem
);
1552 gomp_barrier_handle_tasks (gomp_barrier_state_t state
)
1554 struct gomp_thread
*thr
= gomp_thread ();
1555 struct gomp_team
*team
= thr
->ts
.team
;
1556 struct gomp_task
*task
= thr
->task
;
1557 struct gomp_task
*child_task
= NULL
;
1558 struct gomp_task
*to_free
= NULL
;
1561 gomp_mutex_lock (&team
->task_lock
);
1562 if (gomp_barrier_last_thread (state
))
1564 if (team
->task_count
== 0)
1566 gomp_team_barrier_done (&team
->barrier
, state
);
1567 gomp_mutex_unlock (&team
->task_lock
);
1568 gomp_team_barrier_wake (&team
->barrier
, 0);
1571 gomp_team_barrier_set_waiting_for_tasks (&team
->barrier
);
1576 bool cancelled
= false;
1578 if (!priority_queue_empty_p (&team
->task_queue
, MEMMODEL_RELAXED
))
1582 = priority_queue_next_task (PQ_TEAM
, &team
->task_queue
,
1585 cancelled
= gomp_task_run_pre (child_task
, child_task
->parent
,
1587 if (__builtin_expect (cancelled
, 0))
1591 gomp_finish_task (to_free
);
1595 goto finish_cancelled
;
1597 team
->task_running_count
++;
1598 child_task
->in_tied_task
= true;
1600 else if (team
->task_count
== 0
1601 && gomp_team_barrier_waiting_for_tasks (&team
->barrier
))
1603 gomp_team_barrier_done (&team
->barrier
, state
);
1604 gomp_mutex_unlock (&team
->task_lock
);
1605 gomp_team_barrier_wake (&team
->barrier
, 0);
1608 gomp_finish_task (to_free
);
1613 gomp_mutex_unlock (&team
->task_lock
);
1616 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
1621 gomp_finish_task (to_free
);
1627 thr
->task
= child_task
;
1628 if (__builtin_expect (child_task
->fn
== NULL
, 0))
1630 if (gomp_target_task_fn (child_task
->fn_data
))
1633 gomp_mutex_lock (&team
->task_lock
);
1634 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1635 team
->task_running_count
--;
1636 struct gomp_target_task
*ttask
1637 = (struct gomp_target_task
*) child_task
->fn_data
;
1638 /* If GOMP_PLUGIN_target_task_completion has run already
1639 in between gomp_target_task_fn and the mutex lock,
1640 perform the requeuing here. */
1641 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1642 gomp_target_task_completion (team
, child_task
);
1644 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1650 child_task
->fn (child_task
->fn_data
);
1655 gomp_mutex_lock (&team
->task_lock
);
1658 if (child_task
->detach_team
)
1660 assert (child_task
->detach_team
== team
);
1661 child_task
->kind
= GOMP_TASK_DETACHED
;
1662 ++team
->task_detach_count
;
1663 --team
->task_running_count
;
1665 "thread %d: task with event %p finished without "
1666 "completion event fulfilled in team barrier\n",
1667 thr
->ts
.team_id
, child_task
);
1674 = gomp_task_run_post_handle_depend (child_task
, team
);
1675 gomp_task_run_post_remove_parent (child_task
);
1676 gomp_clear_parent (&child_task
->children_queue
);
1677 gomp_task_run_post_remove_taskgroup (child_task
);
1678 to_free
= child_task
;
1680 team
->task_running_count
--;
1684 do_wake
= team
->nthreads
- team
->task_running_count
;
1685 if (do_wake
> new_tasks
)
1686 do_wake
= new_tasks
;
1693 /* Called when encountering a taskwait directive.
1695 Wait for all children of the current task. */
1698 GOMP_taskwait (void)
1700 struct gomp_thread
*thr
= gomp_thread ();
1701 struct gomp_team
*team
= thr
->ts
.team
;
1702 struct gomp_task
*task
= thr
->task
;
1703 struct gomp_task
*child_task
= NULL
;
1704 struct gomp_task
*to_free
= NULL
;
1705 struct gomp_taskwait taskwait
;
1708 /* The acquire barrier on load of task->children here synchronizes
1709 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1710 not necessary that we synchronize with other non-NULL writes at
1711 this point, but we must ensure that all writes to memory by a
1712 child thread task work function are seen before we exit from
1715 || priority_queue_empty_p (&task
->children_queue
, MEMMODEL_ACQUIRE
))
1718 memset (&taskwait
, 0, sizeof (taskwait
));
1719 bool child_q
= false;
1720 gomp_mutex_lock (&team
->task_lock
);
1723 bool cancelled
= false;
1724 if (priority_queue_empty_p (&task
->children_queue
, MEMMODEL_RELAXED
))
1726 bool destroy_taskwait
= task
->taskwait
!= NULL
;
1727 task
->taskwait
= NULL
;
1728 gomp_mutex_unlock (&team
->task_lock
);
1731 gomp_finish_task (to_free
);
1734 if (destroy_taskwait
)
1735 gomp_sem_destroy (&taskwait
.taskwait_sem
);
1738 struct gomp_task
*next_task
1739 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
1740 PQ_TEAM
, &team
->task_queue
, &child_q
);
1741 if (next_task
->kind
== GOMP_TASK_WAITING
)
1743 child_task
= next_task
;
1745 = gomp_task_run_pre (child_task
, task
, team
);
1746 if (__builtin_expect (cancelled
, 0))
1750 gomp_finish_task (to_free
);
1754 goto finish_cancelled
;
1759 /* All tasks we are waiting for are either running in other
1760 threads, are detached and waiting for the completion event to be
1761 fulfilled, or they are tasks that have not had their
1762 dependencies met (so they're not even in the queue). Wait
1764 if (task
->taskwait
== NULL
)
1766 taskwait
.in_depend_wait
= false;
1767 gomp_sem_init (&taskwait
.taskwait_sem
, 0);
1768 task
->taskwait
= &taskwait
;
1770 taskwait
.in_taskwait
= true;
1772 gomp_mutex_unlock (&team
->task_lock
);
1775 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
1780 gomp_finish_task (to_free
);
1786 thr
->task
= child_task
;
1787 if (__builtin_expect (child_task
->fn
== NULL
, 0))
1789 if (gomp_target_task_fn (child_task
->fn_data
))
1792 gomp_mutex_lock (&team
->task_lock
);
1793 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1794 struct gomp_target_task
*ttask
1795 = (struct gomp_target_task
*) child_task
->fn_data
;
1796 /* If GOMP_PLUGIN_target_task_completion has run already
1797 in between gomp_target_task_fn and the mutex lock,
1798 perform the requeuing here. */
1799 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1800 gomp_target_task_completion (team
, child_task
);
1802 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1808 child_task
->fn (child_task
->fn_data
);
1812 gomp_sem_wait (&taskwait
.taskwait_sem
);
1813 gomp_mutex_lock (&team
->task_lock
);
1816 if (child_task
->detach_team
)
1818 assert (child_task
->detach_team
== team
);
1819 child_task
->kind
= GOMP_TASK_DETACHED
;
1820 ++team
->task_detach_count
;
1822 "thread %d: task with event %p finished without "
1823 "completion event fulfilled in taskwait\n",
1824 thr
->ts
.team_id
, child_task
);
1831 = gomp_task_run_post_handle_depend (child_task
, team
);
1835 priority_queue_remove (PQ_CHILDREN
, &task
->children_queue
,
1836 child_task
, MEMMODEL_RELAXED
);
1837 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
1838 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
1841 gomp_clear_parent (&child_task
->children_queue
);
1843 gomp_task_run_post_remove_taskgroup (child_task
);
1845 to_free
= child_task
;
1850 do_wake
= team
->nthreads
- team
->task_running_count
1851 - !task
->in_tied_task
;
1852 if (do_wake
> new_tasks
)
1853 do_wake
= new_tasks
;
1859 /* Called when encountering a taskwait directive with depend clause(s).
1860 Wait as if it was an mergeable included task construct with empty body. */
1863 GOMP_taskwait_depend (void **depend
)
1865 struct gomp_thread
*thr
= gomp_thread ();
1866 struct gomp_team
*team
= thr
->ts
.team
;
1868 /* If parallel or taskgroup has been cancelled, return early. */
1869 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1871 if (gomp_team_barrier_cancelled (&team
->barrier
))
1873 if (thr
->task
->taskgroup
)
1875 if (thr
->task
->taskgroup
->cancelled
)
1877 if (thr
->task
->taskgroup
->workshare
1878 && thr
->task
->taskgroup
->prev
1879 && thr
->task
->taskgroup
->prev
->cancelled
)
1884 if (thr
->task
&& thr
->task
->depend_hash
)
1885 gomp_task_maybe_wait_for_dependencies (depend
);
1888 /* Called when encountering a taskwait directive with nowait and depend
1889 clause(s). Create a possibly deferred task construct with empty body. */
1892 GOMP_taskwait_depend_nowait (void **depend
)
1894 ialias_call (GOMP_task
) (empty_task
, "", NULL
, 0, 1, true,
1895 GOMP_TASK_FLAG_DEPEND
, depend
, 0, NULL
);
1898 /* An undeferred task is about to run. Wait for all tasks that this
1899 undeferred task depends on.
1901 This is done by first putting all known ready dependencies
1902 (dependencies that have their own dependencies met) at the top of
1903 the scheduling queues. Then we iterate through these imminently
1904 ready tasks (and possibly other high priority tasks), and run them.
1905 If we run out of ready dependencies to execute, we either wait for
1906 the remaining dependencies to finish, or wait for them to get
1907 scheduled so we can run them.
1909 DEPEND is as in GOMP_task. */
1912 gomp_task_maybe_wait_for_dependencies (void **depend
)
1914 struct gomp_thread
*thr
= gomp_thread ();
1915 struct gomp_task
*task
= thr
->task
;
1916 struct gomp_team
*team
= thr
->ts
.team
;
1917 struct gomp_task_depend_entry elem
, *ent
= NULL
;
1918 struct gomp_taskwait taskwait
;
1919 size_t orig_ndepend
= (uintptr_t) depend
[0];
1920 size_t nout
= (uintptr_t) depend
[1];
1921 size_t ndepend
= orig_ndepend
;
1922 size_t normal
= ndepend
;
1925 size_t num_awaited
= 0;
1926 struct gomp_task
*child_task
= NULL
;
1927 struct gomp_task
*to_free
= NULL
;
1933 nout
= (uintptr_t) depend
[2] + (uintptr_t) depend
[3];
1934 normal
= nout
+ (uintptr_t) depend
[4];
1937 gomp_mutex_lock (&team
->task_lock
);
1938 if (__builtin_expect (task
->depend_all_memory
&& ndepend
, false))
1940 struct gomp_task
*tsk
= task
->depend_all_memory
;
1941 if (!tsk
->parent_depends_on
)
1943 tsk
->parent_depends_on
= true;
1945 if (tsk
->num_dependees
== 0 && tsk
->kind
== GOMP_TASK_WAITING
)
1946 priority_queue_upgrade_task (tsk
, task
);
1949 for (i
= 0; i
< ndepend
; i
++)
1951 elem
.addr
= depend
[i
+ n
];
1952 elem
.is_in
= i
>= nout
;
1953 if (__builtin_expect (i
>= normal
, 0))
1955 void **d
= (void **) elem
.addr
;
1956 switch ((uintptr_t) d
[1])
1958 case GOMP_DEPEND_IN
:
1960 case GOMP_DEPEND_OUT
:
1961 case GOMP_DEPEND_INOUT
:
1962 case GOMP_DEPEND_MUTEXINOUTSET
:
1965 case GOMP_DEPEND_INOUTSET
:
1969 gomp_fatal ("unknown omp_depend_t dependence type %d",
1970 (int) (uintptr_t) d
[1]);
1974 if (__builtin_expect (elem
.addr
== NULL
&& !elem
.is_in
, false))
1976 size_t size
= htab_size (task
->depend_hash
);
1977 if (htab_elements (task
->depend_hash
) * 8 < size
&& size
> 32)
1978 htab_expand (task
->depend_hash
);
1980 /* depend(inout: omp_all_memory) - depend on all previous
1981 sibling tasks that do have dependencies. Inlined
1983 hash_entry_type
*slot
= &task
->depend_hash
->entries
[0];
1984 hash_entry_type
*end
= slot
+ htab_size (task
->depend_hash
);
1985 for (; slot
!= end
; ++slot
)
1987 if (*slot
== HTAB_EMPTY_ENTRY
|| *slot
== HTAB_DELETED_ENTRY
)
1989 for (ent
= *slot
; ent
; ent
= ent
->next
)
1991 struct gomp_task
*tsk
= ent
->task
;
1992 if (!tsk
->parent_depends_on
)
1994 tsk
->parent_depends_on
= true;
1996 if (tsk
->num_dependees
== 0
1997 && tsk
->kind
== GOMP_TASK_WAITING
)
1998 priority_queue_upgrade_task (tsk
, task
);
2004 ent
= htab_find (task
->depend_hash
, &elem
);
2005 for (; ent
; ent
= ent
->next
)
2006 if (elem
.is_in
&& elem
.is_in
== ent
->is_in
)
2010 struct gomp_task
*tsk
= ent
->task
;
2011 if (!tsk
->parent_depends_on
)
2013 tsk
->parent_depends_on
= true;
2015 /* If dependency TSK itself has no dependencies and is
2016 ready to run, move it up front so that we run it as
2017 soon as possible. */
2018 if (tsk
->num_dependees
== 0 && tsk
->kind
== GOMP_TASK_WAITING
)
2019 priority_queue_upgrade_task (tsk
, task
);
2023 if (num_awaited
== 0)
2025 gomp_mutex_unlock (&team
->task_lock
);
2029 memset (&taskwait
, 0, sizeof (taskwait
));
2030 taskwait
.n_depend
= num_awaited
;
2031 gomp_sem_init (&taskwait
.taskwait_sem
, 0);
2032 task
->taskwait
= &taskwait
;
2036 bool cancelled
= false;
2037 if (taskwait
.n_depend
== 0)
2039 task
->taskwait
= NULL
;
2040 gomp_mutex_unlock (&team
->task_lock
);
2043 gomp_finish_task (to_free
);
2046 gomp_sem_destroy (&taskwait
.taskwait_sem
);
2050 /* Theoretically when we have multiple priorities, we should
2051 chose between the highest priority item in
2052 task->children_queue and team->task_queue here, so we should
2053 use priority_queue_next_task(). However, since we are
2054 running an undeferred task, perhaps that makes all tasks it
2055 depends on undeferred, thus a priority of INF? This would
2056 make it unnecessary to take anything into account here,
2057 but the dependencies.
2059 On the other hand, if we want to use priority_queue_next_task(),
2060 care should be taken to only use priority_queue_remove()
2061 below if the task was actually removed from the children
2064 struct gomp_task
*next_task
2065 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
2066 PQ_IGNORED
, NULL
, &ignored
);
2068 if (next_task
->kind
== GOMP_TASK_WAITING
)
2070 child_task
= next_task
;
2072 = gomp_task_run_pre (child_task
, task
, team
);
2073 if (__builtin_expect (cancelled
, 0))
2077 gomp_finish_task (to_free
);
2081 goto finish_cancelled
;
2085 /* All tasks we are waiting for are either running in other
2086 threads, or they are tasks that have not had their
2087 dependencies met (so they're not even in the queue). Wait
2089 taskwait
.in_depend_wait
= true;
2090 gomp_mutex_unlock (&team
->task_lock
);
2093 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
2098 gomp_finish_task (to_free
);
2104 thr
->task
= child_task
;
2105 if (__builtin_expect (child_task
->fn
== NULL
, 0))
2107 if (gomp_target_task_fn (child_task
->fn_data
))
2110 gomp_mutex_lock (&team
->task_lock
);
2111 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
2112 struct gomp_target_task
*ttask
2113 = (struct gomp_target_task
*) child_task
->fn_data
;
2114 /* If GOMP_PLUGIN_target_task_completion has run already
2115 in between gomp_target_task_fn and the mutex lock,
2116 perform the requeuing here. */
2117 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2118 gomp_target_task_completion (team
, child_task
);
2120 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
2126 child_task
->fn (child_task
->fn_data
);
2130 gomp_sem_wait (&taskwait
.taskwait_sem
);
2131 gomp_mutex_lock (&team
->task_lock
);
2136 = gomp_task_run_post_handle_depend (child_task
, team
);
2137 if (child_task
->parent_depends_on
)
2138 --taskwait
.n_depend
;
2140 priority_queue_remove (PQ_CHILDREN
, &task
->children_queue
,
2141 child_task
, MEMMODEL_RELAXED
);
2142 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
2143 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
2145 gomp_clear_parent (&child_task
->children_queue
);
2146 gomp_task_run_post_remove_taskgroup (child_task
);
2147 to_free
= child_task
;
2152 do_wake
= team
->nthreads
- team
->task_running_count
2153 - !task
->in_tied_task
;
2154 if (do_wake
> new_tasks
)
2155 do_wake
= new_tasks
;
2161 /* Called when encountering a taskyield directive. */
2164 GOMP_taskyield (void)
2166 /* Nothing at the moment. */
2169 static inline struct gomp_taskgroup
*
2170 gomp_taskgroup_init (struct gomp_taskgroup
*prev
)
2172 struct gomp_taskgroup
*taskgroup
2173 = gomp_malloc (sizeof (struct gomp_taskgroup
));
2174 taskgroup
->prev
= prev
;
2175 priority_queue_init (&taskgroup
->taskgroup_queue
);
2176 taskgroup
->reductions
= prev
? prev
->reductions
: NULL
;
2177 taskgroup
->in_taskgroup_wait
= false;
2178 taskgroup
->cancelled
= false;
2179 taskgroup
->workshare
= false;
2180 taskgroup
->num_children
= 0;
2181 gomp_sem_init (&taskgroup
->taskgroup_sem
, 0);
2186 GOMP_taskgroup_start (void)
2188 struct gomp_thread
*thr
= gomp_thread ();
2189 struct gomp_team
*team
= thr
->ts
.team
;
2190 struct gomp_task
*task
= thr
->task
;
2192 /* If team is NULL, all tasks are executed as
2193 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
2194 taskgroup and their descendant tasks will be finished
2195 by the time GOMP_taskgroup_end is called. */
2198 task
->taskgroup
= gomp_taskgroup_init (task
->taskgroup
);
2202 GOMP_taskgroup_end (void)
2204 struct gomp_thread
*thr
= gomp_thread ();
2205 struct gomp_team
*team
= thr
->ts
.team
;
2206 struct gomp_task
*task
= thr
->task
;
2207 struct gomp_taskgroup
*taskgroup
;
2208 struct gomp_task
*child_task
= NULL
;
2209 struct gomp_task
*to_free
= NULL
;
2214 taskgroup
= task
->taskgroup
;
2215 if (__builtin_expect (taskgroup
== NULL
, 0)
2216 && thr
->ts
.level
== 0)
2218 /* This can happen if GOMP_taskgroup_start is called when
2219 thr->ts.team == NULL, but inside of the taskgroup there
2220 is #pragma omp target nowait that creates an implicit
2221 team with a single thread. In this case, we want to wait
2222 for all outstanding tasks in this team. */
2223 gomp_team_barrier_wait (&team
->barrier
);
2227 /* The acquire barrier on load of taskgroup->num_children here
2228 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
2229 It is not necessary that we synchronize with other non-0 writes at
2230 this point, but we must ensure that all writes to memory by a
2231 child thread task work function are seen before we exit from
2232 GOMP_taskgroup_end. */
2233 if (__atomic_load_n (&taskgroup
->num_children
, MEMMODEL_ACQUIRE
) == 0)
2237 gomp_mutex_lock (&team
->task_lock
);
2240 bool cancelled
= false;
2241 if (priority_queue_empty_p (&taskgroup
->taskgroup_queue
,
2244 if (taskgroup
->num_children
)
2246 if (priority_queue_empty_p (&task
->children_queue
,
2250 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
2251 PQ_TEAM
, &team
->task_queue
,
2256 gomp_mutex_unlock (&team
->task_lock
);
2259 gomp_finish_task (to_free
);
2267 = priority_queue_next_task (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
2268 PQ_TEAM
, &team
->task_queue
, &unused
);
2269 if (child_task
->kind
== GOMP_TASK_WAITING
)
2272 = gomp_task_run_pre (child_task
, child_task
->parent
, team
);
2273 if (__builtin_expect (cancelled
, 0))
2277 gomp_finish_task (to_free
);
2281 goto finish_cancelled
;
2288 /* All tasks we are waiting for are either running in other
2289 threads, or they are tasks that have not had their
2290 dependencies met (so they're not even in the queue). Wait
2292 taskgroup
->in_taskgroup_wait
= true;
2294 gomp_mutex_unlock (&team
->task_lock
);
2297 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
2302 gomp_finish_task (to_free
);
2308 thr
->task
= child_task
;
2309 if (__builtin_expect (child_task
->fn
== NULL
, 0))
2311 if (gomp_target_task_fn (child_task
->fn_data
))
2314 gomp_mutex_lock (&team
->task_lock
);
2315 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
2316 struct gomp_target_task
*ttask
2317 = (struct gomp_target_task
*) child_task
->fn_data
;
2318 /* If GOMP_PLUGIN_target_task_completion has run already
2319 in between gomp_target_task_fn and the mutex lock,
2320 perform the requeuing here. */
2321 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2322 gomp_target_task_completion (team
, child_task
);
2324 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
2330 child_task
->fn (child_task
->fn_data
);
2334 gomp_sem_wait (&taskgroup
->taskgroup_sem
);
2335 gomp_mutex_lock (&team
->task_lock
);
2338 if (child_task
->detach_team
)
2340 assert (child_task
->detach_team
== team
);
2341 child_task
->kind
= GOMP_TASK_DETACHED
;
2342 ++team
->task_detach_count
;
2344 "thread %d: task with event %p finished without "
2345 "completion event fulfilled in taskgroup\n",
2346 thr
->ts
.team_id
, child_task
);
2353 = gomp_task_run_post_handle_depend (child_task
, team
);
2354 gomp_task_run_post_remove_parent (child_task
);
2355 gomp_clear_parent (&child_task
->children_queue
);
2356 gomp_task_run_post_remove_taskgroup (child_task
);
2357 to_free
= child_task
;
2362 do_wake
= team
->nthreads
- team
->task_running_count
2363 - !task
->in_tied_task
;
2364 if (do_wake
> new_tasks
)
2365 do_wake
= new_tasks
;
2371 task
->taskgroup
= taskgroup
->prev
;
2372 gomp_sem_destroy (&taskgroup
->taskgroup_sem
);
2376 static inline __attribute__((always_inline
)) void
2377 gomp_reduction_register (uintptr_t *data
, uintptr_t *old
, uintptr_t *orig
,
2380 size_t total_cnt
= 0;
2381 uintptr_t *d
= data
;
2382 struct htab
*old_htab
= NULL
, *new_htab
;
2385 if (__builtin_expect (orig
!= NULL
, 0))
2387 /* For worksharing task reductions, memory has been allocated
2388 already by some other thread that encountered the construct
2392 orig
= (uintptr_t *) orig
[4];
2396 size_t sz
= d
[1] * nthreads
;
2397 /* Should use omp_alloc if d[3] is not -1. */
2398 void *ptr
= gomp_aligned_alloc (d
[2], sz
);
2399 memset (ptr
, '\0', sz
);
2400 d
[2] = (uintptr_t) ptr
;
2407 d
[4] = (uintptr_t) old
;
2411 d
= (uintptr_t *) d
[4];
2416 old_htab
= (struct htab
*) old
[5];
2417 total_cnt
+= htab_elements (old_htab
);
2419 new_htab
= htab_create (total_cnt
);
2422 /* Copy old hash table, like in htab_expand. */
2423 hash_entry_type
*p
, *olimit
;
2424 new_htab
->n_elements
= htab_elements (old_htab
);
2425 olimit
= old_htab
->entries
+ old_htab
->size
;
2426 p
= old_htab
->entries
;
2429 hash_entry_type x
= *p
;
2430 if (x
!= HTAB_EMPTY_ENTRY
&& x
!= HTAB_DELETED_ENTRY
)
2431 *find_empty_slot_for_expand (new_htab
, htab_hash (x
)) = x
;
2440 for (j
= 0; j
< d
[0]; ++j
)
2442 uintptr_t *p
= d
+ 7 + j
* 3;
2443 p
[2] = (uintptr_t) d
;
2444 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2445 which hash on the first element which is a pointer. We need
2446 to hash also on the first sizeof (uintptr_t) bytes which contain
2447 a pointer. Hide the cast from the compiler. */
2449 __asm ("" : "=g" (n
) : "0" (p
));
2450 *htab_find_slot (&new_htab
, n
, INSERT
) = n
;
2452 if (d
[4] == (uintptr_t) old
)
2455 d
= (uintptr_t *) d
[4];
2458 d
[5] = (uintptr_t) new_htab
;
2462 gomp_create_artificial_team (void)
2464 struct gomp_thread
*thr
= gomp_thread ();
2465 struct gomp_task_icv
*icv
;
2466 struct gomp_team
*team
= gomp_new_team (1);
2467 struct gomp_task
*task
= thr
->task
;
2468 struct gomp_task
**implicit_task
= &task
;
2469 icv
= task
? &task
->icv
: &gomp_global_icv
;
2470 team
->prev_ts
= thr
->ts
;
2471 thr
->ts
.team
= team
;
2472 thr
->ts
.team_id
= 0;
2473 thr
->ts
.work_share
= &team
->work_shares
[0];
2474 thr
->ts
.last_work_share
= NULL
;
2475 #ifdef HAVE_SYNC_BUILTINS
2476 thr
->ts
.single_count
= 0;
2478 thr
->ts
.static_trip
= 0;
2479 thr
->task
= &team
->implicit_task
[0];
2480 gomp_init_task (thr
->task
, NULL
, icv
);
2481 while (*implicit_task
2482 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
2483 implicit_task
= &(*implicit_task
)->parent
;
2486 thr
->task
= *implicit_task
;
2488 free (*implicit_task
);
2489 thr
->task
= &team
->implicit_task
[0];
2491 #ifdef LIBGOMP_USE_PTHREADS
2493 pthread_setspecific (gomp_thread_destructor
, thr
);
2495 if (implicit_task
!= &task
)
2497 *implicit_task
= thr
->task
;
2502 /* The format of data is:
2505 data[2] alignment (on output array pointer)
2506 data[3] allocator (-1 if malloc allocator)
2507 data[4] next pointer
2508 data[5] used internally (htab pointer)
2509 data[6] used internally (end of array)
2513 ent[2] used internally (pointer to data[0])
2514 The entries are sorted by increasing offset, so that a binary
2515 search can be performed. Normally, data[8] is 0, exception is
2516 for worksharing construct task reductions in cancellable parallel,
2517 where at offset 0 there should be space for a pointer and an integer
2518 which are used internally. */
2521 GOMP_taskgroup_reduction_register (uintptr_t *data
)
2523 struct gomp_thread
*thr
= gomp_thread ();
2524 struct gomp_team
*team
= thr
->ts
.team
;
2525 struct gomp_task
*task
;
2527 if (__builtin_expect (team
== NULL
, 0))
2529 /* The task reduction code needs a team and task, so for
2530 orphaned taskgroups just create the implicit team. */
2531 gomp_create_artificial_team ();
2532 ialias_call (GOMP_taskgroup_start
) ();
2533 team
= thr
->ts
.team
;
2535 nthreads
= team
->nthreads
;
2537 gomp_reduction_register (data
, task
->taskgroup
->reductions
, NULL
, nthreads
);
2538 task
->taskgroup
->reductions
= data
;
2542 GOMP_taskgroup_reduction_unregister (uintptr_t *data
)
2544 uintptr_t *d
= data
;
2545 htab_free ((struct htab
*) data
[5]);
2548 gomp_aligned_free ((void *) d
[2]);
2549 d
= (uintptr_t *) d
[4];
2553 ialias (GOMP_taskgroup_reduction_unregister
)
2555 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2556 original list item or address of previously remapped original list
2557 item to address of the private copy, store that to ptrs[i].
2558 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2559 the original list item. */
2562 GOMP_task_reduction_remap (size_t cnt
, size_t cntorig
, void **ptrs
)
2564 struct gomp_thread
*thr
= gomp_thread ();
2565 struct gomp_task
*task
= thr
->task
;
2566 unsigned id
= thr
->ts
.team_id
;
2567 uintptr_t *data
= task
->taskgroup
->reductions
;
2569 struct htab
*reduction_htab
= (struct htab
*) data
[5];
2571 for (i
= 0; i
< cnt
; ++i
)
2573 hash_entry_type ent
, n
;
2574 __asm ("" : "=g" (ent
) : "0" (ptrs
+ i
));
2575 n
= htab_find (reduction_htab
, ent
);
2579 __asm ("" : "=g" (p
) : "0" (n
));
2580 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2581 p[1] is the offset within the allocated chunk for each
2582 thread, p[2] is the array registered with
2583 GOMP_taskgroup_reduction_register, d[2] is the base of the
2584 allocated memory and d[1] is the size of the allocated chunk
2586 d
= (uintptr_t *) p
[2];
2587 ptrs
[i
] = (void *) (d
[2] + id
* d
[1] + p
[1]);
2588 if (__builtin_expect (i
< cntorig
, 0))
2589 ptrs
[cnt
+ i
] = (void *) p
[0];
2595 if ((uintptr_t) ptrs
[i
] >= d
[2] && (uintptr_t) ptrs
[i
] < d
[6])
2597 d
= (uintptr_t *) d
[4];
2600 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2601 "task modifier for %p", ptrs
[i
]);
2602 uintptr_t off
= ((uintptr_t) ptrs
[i
] - d
[2]) % d
[1];
2603 ptrs
[i
] = (void *) (d
[2] + id
* d
[1] + off
);
2604 if (__builtin_expect (i
< cntorig
, 0))
2606 size_t lo
= 0, hi
= d
[0] - 1;
2609 size_t m
= (lo
+ hi
) / 2;
2610 if (d
[7 + 3 * m
+ 1] < off
)
2612 else if (d
[7 + 3 * m
+ 1] == off
)
2614 ptrs
[cnt
+ i
] = (void *) d
[7 + 3 * m
];
2621 gomp_fatal ("couldn't find matching task_reduction or reduction "
2622 "with task modifier for %p", ptrs
[i
]);
2627 struct gomp_taskgroup
*
2628 gomp_parallel_reduction_register (uintptr_t *data
, unsigned nthreads
)
2630 struct gomp_taskgroup
*taskgroup
= gomp_taskgroup_init (NULL
);
2631 gomp_reduction_register (data
, NULL
, NULL
, nthreads
);
2632 taskgroup
->reductions
= data
;
2637 gomp_workshare_task_reduction_register (uintptr_t *data
, uintptr_t *orig
)
2639 struct gomp_thread
*thr
= gomp_thread ();
2640 struct gomp_team
*team
= thr
->ts
.team
;
2641 struct gomp_task
*task
= thr
->task
;
2642 unsigned nthreads
= team
->nthreads
;
2643 gomp_reduction_register (data
, task
->taskgroup
->reductions
, orig
, nthreads
);
2644 task
->taskgroup
->reductions
= data
;
2648 gomp_workshare_taskgroup_start (void)
2650 struct gomp_thread
*thr
= gomp_thread ();
2651 struct gomp_team
*team
= thr
->ts
.team
;
2652 struct gomp_task
*task
;
2656 gomp_create_artificial_team ();
2657 team
= thr
->ts
.team
;
2660 task
->taskgroup
= gomp_taskgroup_init (task
->taskgroup
);
2661 task
->taskgroup
->workshare
= true;
2665 GOMP_workshare_task_reduction_unregister (bool cancelled
)
2667 struct gomp_thread
*thr
= gomp_thread ();
2668 struct gomp_task
*task
= thr
->task
;
2669 struct gomp_team
*team
= thr
->ts
.team
;
2670 uintptr_t *data
= task
->taskgroup
->reductions
;
2671 ialias_call (GOMP_taskgroup_end
) ();
2672 if (thr
->ts
.team_id
== 0)
2673 ialias_call (GOMP_taskgroup_reduction_unregister
) (data
);
2675 htab_free ((struct htab
*) data
[5]);
2678 gomp_team_barrier_wait (&team
->barrier
);
2684 struct gomp_thread
*thr
= gomp_thread ();
2685 return thr
->task
&& thr
->task
->final_task
;
2688 ialias (omp_in_final
)
2691 omp_in_explicit_task (void)
2693 struct gomp_thread
*thr
= gomp_thread ();
2694 struct gomp_task
*task
= thr
->task
;
2695 return task
&& task
->kind
!= GOMP_TASK_IMPLICIT
;
2698 ialias (omp_in_explicit_task
)
2701 omp_fulfill_event (omp_event_handle_t event
)
2703 struct gomp_task
*task
= (struct gomp_task
*) event
;
2704 if (!task
->deferred_p
)
2706 if (gomp_sem_getcount (task
->completion_sem
) > 0)
2707 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task
);
2709 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2711 gomp_sem_post (task
->completion_sem
);
2715 struct gomp_team
*team
= __atomic_load_n (&task
->detach_team
,
2718 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2719 "been fulfilled!\n", task
);
2721 gomp_mutex_lock (&team
->task_lock
);
2722 if (task
->kind
!= GOMP_TASK_DETACHED
)
2724 /* The task has not finished running yet. */
2726 "omp_fulfill_event: %p event fulfilled for unfinished "
2728 __atomic_store_n (&task
->detach_team
, NULL
, MEMMODEL_RELAXED
);
2729 gomp_mutex_unlock (&team
->task_lock
);
2733 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2735 size_t new_tasks
= gomp_task_run_post_handle_depend (task
, team
);
2736 gomp_task_run_post_remove_parent (task
);
2737 gomp_clear_parent (&task
->children_queue
);
2738 gomp_task_run_post_remove_taskgroup (task
);
2740 team
->task_detach_count
--;
2743 bool shackled_thread_p
= team
== gomp_thread ()->ts
.team
;
2746 /* Wake up threads to run new tasks. */
2747 gomp_team_barrier_set_task_pending (&team
->barrier
);
2748 do_wake
= team
->nthreads
- team
->task_running_count
;
2749 if (do_wake
> new_tasks
)
2750 do_wake
= new_tasks
;
2753 if (!shackled_thread_p
2755 && team
->task_detach_count
== 0
2756 && gomp_team_barrier_waiting_for_tasks (&team
->barrier
))
2757 /* Ensure that at least one thread is woken up to signal that the
2758 barrier can finish. */
2761 /* If we are running in an unshackled thread, the team might vanish before
2762 gomp_team_barrier_wake is run if we release the lock first, so keep the
2763 lock for the call in that case. */
2764 if (shackled_thread_p
)
2765 gomp_mutex_unlock (&team
->task_lock
);
2767 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
2768 if (!shackled_thread_p
)
2769 gomp_mutex_unlock (&team
->task_lock
);
2771 gomp_finish_task (task
);
2775 ialias (omp_fulfill_event
)