re PR fortran/63861 (OpenACC coarray ICE (also with OpenMP?))
[official-gcc.git] / libgomp / task.c
blob620facd3feadc24748dbd6c09f45eefb83c003bb
1 /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of tasks in response to task
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
34 typedef struct gomp_task_depend_entry *hash_entry_type;
36 static inline void *
37 htab_alloc (size_t size)
39 return gomp_malloc (size);
42 static inline void
43 htab_free (void *ptr)
45 free (ptr);
48 #include "hashtab.h"
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
53 return hash_pointer (element->addr);
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
59 return x->addr == y->addr;
62 /* Create a new task data structure. */
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
91 /* Clean up a task, after completing it. */
93 void
94 gomp_end_task (void)
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
99 gomp_finish_task (task);
100 thr->task = task->parent;
103 /* Clear the parent field of every task in LIST. */
105 static inline void
106 gomp_clear_parent_in_list (struct priority_list *list)
108 struct priority_node *p = list->tasks;
109 if (p)
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
115 while (p != list->tasks);
118 /* Splay tree version of gomp_clear_parent_in_list.
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
123 static void
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
144 static inline void
145 gomp_clear_parent (struct priority_queue *q)
147 if (priority_queue_multi_p (q))
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
154 else
155 gomp_clear_parent_in_list (&q->l);
158 /* Helper function for GOMP_task and gomp_create_target_task.
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
164 static void
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t nout = (uintptr_t) depend[1];
170 size_t i;
171 hash_entry_type ent;
173 task->depend_count = ndepend;
174 task->num_dependees = 0;
175 if (parent->depend_hash == NULL)
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177 for (i = 0; i < ndepend; i++)
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].next = NULL;
181 task->depend[i].prev = NULL;
182 task->depend[i].task = task;
183 task->depend[i].is_in = i >= nout;
184 task->depend[i].redundant = false;
185 task->depend[i].redundant_out = false;
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 &task->depend[i], INSERT);
189 hash_entry_type out = NULL, last = NULL;
190 if (*slot)
192 /* If multiple depends on the same task are the same, all but the
193 first one are redundant. As inout/out come first, if any of them
194 is inout/out, it will win, which is the right semantics. */
195 if ((*slot)->task == task)
197 task->depend[i].redundant = true;
198 continue;
200 for (ent = *slot; ent; ent = ent->next)
202 if (ent->redundant_out)
203 break;
205 last = ent;
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
208 if (i >= nout && ent->is_in)
209 continue;
211 if (!ent->is_in)
212 out = ent;
214 struct gomp_task *tsk = ent->task;
215 if (tsk->dependers == NULL)
217 tsk->dependers
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 + 6 * sizeof (struct gomp_task *));
220 tsk->dependers->n_elem = 1;
221 tsk->dependers->allocated = 6;
222 tsk->dependers->elem[0] = task;
223 task->num_dependees++;
224 continue;
226 /* We already have some other dependency on tsk from earlier
227 depend clause. */
228 else if (tsk->dependers->n_elem
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 == task))
231 continue;
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
234 tsk->dependers->allocated
235 = tsk->dependers->allocated * 2 + 2;
236 tsk->dependers
237 = gomp_realloc (tsk->dependers,
238 sizeof (struct gomp_dependers_vec)
239 + (tsk->dependers->allocated
240 * sizeof (struct gomp_task *)));
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 task->num_dependees++;
245 task->depend[i].next = *slot;
246 (*slot)->prev = &task->depend[i];
248 *slot = &task->depend[i];
250 /* There is no need to store more than one depend({,in}out:) task per
251 address in the hash table chain for the purpose of creation of
252 deferred tasks, because each out depends on all earlier outs, thus it
253 is enough to record just the last depend({,in}out:). For depend(in:),
254 we need to keep all of the previous ones not terminated yet, because
255 a later depend({,in}out:) might need to depend on all of them. So, if
256 the new task's clause is depend({,in}out:), we know there is at most
257 one other depend({,in}out:) clause in the list (out). For
258 non-deferred tasks we want to see all outs, so they are moved to the
259 end of the chain, after first redundant_out entry all following
260 entries should be redundant_out. */
261 if (!task->depend[i].is_in && out)
263 if (out != last)
265 out->next->prev = out->prev;
266 out->prev->next = out->next;
267 out->next = last->next;
268 out->prev = last;
269 last->next = out;
270 if (out->next)
271 out->next->prev = out;
273 out->redundant_out = true;
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
279 false, then we must not delay in executing the task. If UNTIED is true,
280 then the task may be executed by any member of the team.
282 DEPEND is an array containing:
283 depend[0]: number of depend elements.
284 depend[1]: number of depend elements of type "out".
285 depend[2..N+1]: address of [1..N]th depend element. */
287 void
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
290 void **depend, int priority)
292 struct gomp_thread *thr = gomp_thread ();
293 struct gomp_team *team = thr->ts.team;
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297 tied to one thread all the time. This means UNTIED tasks must be
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299 might be running on different thread than FN. */
300 if (cpyfn)
301 if_clause = false;
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306 if (team
307 && (gomp_team_barrier_cancelled (&team->barrier)
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309 return;
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312 priority = 0;
313 else if (priority > gomp_max_task_priority_var)
314 priority = gomp_max_task_priority_var;
316 if (!if_clause || team == NULL
317 || (thr->task && thr->task->final_task)
318 || team->task_count > 64 * team->nthreads)
320 struct gomp_task task;
322 /* If there are depend clauses and earlier deferred sibling tasks
323 with depend clauses, check if there isn't a dependency. If there
324 is, we need to wait for them. There is no need to handle
325 depend clauses for non-deferred tasks other than this, because
326 the parent task is suspended until the child task finishes and thus
327 it can't start further child tasks. */
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
329 && thr->task && thr->task->depend_hash)
330 gomp_task_maybe_wait_for_dependencies (depend);
332 gomp_init_task (&task, thr->task, gomp_icv (false));
333 task.kind = GOMP_TASK_UNDEFERRED;
334 task.final_task = (thr->task && thr->task->final_task)
335 || (flags & GOMP_TASK_FLAG_FINAL);
336 task.priority = priority;
337 if (thr->task)
339 task.in_tied_task = thr->task->in_tied_task;
340 task.taskgroup = thr->task->taskgroup;
342 thr->task = &task;
343 if (__builtin_expect (cpyfn != NULL, 0))
345 char buf[arg_size + arg_align - 1];
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 & ~(uintptr_t) (arg_align - 1));
348 cpyfn (arg, data);
349 fn (arg);
351 else
352 fn (data);
353 /* Access to "children" is normally done inside a task_lock
354 mutex region, but the only way this particular task.children
355 can be set is if this thread's task work function (fn)
356 creates children. So since the setter is *this* thread, we
357 need no barriers here when testing for non-NULL. We can have
358 task.children set by the current thread then changed by a
359 child thread, but seeing a stale non-NULL value is not a
360 problem. Once past the task_lock acquisition, this thread
361 will see the real value of task.children. */
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
364 gomp_mutex_lock (&team->task_lock);
365 gomp_clear_parent (&task.children_queue);
366 gomp_mutex_unlock (&team->task_lock);
368 gomp_end_task ();
370 else
372 struct gomp_task *task;
373 struct gomp_task *parent = thr->task;
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
375 char *arg;
376 bool do_wake;
377 size_t depend_size = 0;
379 if (flags & GOMP_TASK_FLAG_DEPEND)
380 depend_size = ((uintptr_t) depend[0]
381 * sizeof (struct gomp_task_depend_entry));
382 task = gomp_malloc (sizeof (*task) + depend_size
383 + arg_size + arg_align - 1);
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 & ~(uintptr_t) (arg_align - 1));
386 gomp_init_task (task, parent, gomp_icv (false));
387 task->priority = priority;
388 task->kind = GOMP_TASK_UNDEFERRED;
389 task->in_tied_task = parent->in_tied_task;
390 task->taskgroup = taskgroup;
391 thr->task = task;
392 if (cpyfn)
394 cpyfn (arg, data);
395 task->copy_ctors_done = true;
397 else
398 memcpy (arg, data, arg_size);
399 thr->task = parent;
400 task->kind = GOMP_TASK_WAITING;
401 task->fn = fn;
402 task->fn_data = arg;
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404 gomp_mutex_lock (&team->task_lock);
405 /* If parallel or taskgroup has been cancelled, don't start new
406 tasks. */
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 || (taskgroup && taskgroup->cancelled))
409 && !task->copy_ctors_done, 0))
411 gomp_mutex_unlock (&team->task_lock);
412 gomp_finish_task (task);
413 free (task);
414 return;
416 if (taskgroup)
417 taskgroup->num_children++;
418 if (depend_size)
420 gomp_task_handle_depend (task, parent, depend);
421 if (task->num_dependees)
423 /* Tasks that depend on other tasks are not put into the
424 various waiting queues, so we are done for now. Said
425 tasks are instead put into the queues via
426 gomp_task_run_post_handle_dependers() after their
427 dependencies have been satisfied. After which, they
428 can be picked up by the various scheduling
429 points. */
430 gomp_mutex_unlock (&team->task_lock);
431 return;
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 task, priority,
437 PRIORITY_INSERT_BEGIN,
438 /*adjust_parent_depends_on=*/false,
439 task->parent_depends_on);
440 if (taskgroup)
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 task, priority,
443 PRIORITY_INSERT_BEGIN,
444 /*adjust_parent_depends_on=*/false,
445 task->parent_depends_on);
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
448 task, priority,
449 PRIORITY_INSERT_END,
450 /*adjust_parent_depends_on=*/false,
451 task->parent_depends_on);
453 ++team->task_count;
454 ++team->task_queued_count;
455 gomp_team_barrier_set_task_pending (&team->barrier);
456 do_wake = team->task_running_count + !parent->in_tied_task
457 < team->nthreads;
458 gomp_mutex_unlock (&team->task_lock);
459 if (do_wake)
460 gomp_team_barrier_wake (&team->barrier, 1);
464 ialias (GOMP_taskgroup_start)
465 ialias (GOMP_taskgroup_end)
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 struct priority_queue *head,
486 struct gomp_task *task)
488 #if _LIBGOMP_CHECKING_
489 if (!priority_queue_task_in_queue_p (type, head, task))
490 gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492 struct priority_list *list;
493 if (priority_queue_multi_p (head))
495 list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497 if (!list)
498 gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
501 else
502 list = &head->l;
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
504 priority_list_insert (type, list, task, task->priority,
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 task->parent_depends_on);
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510 with team->task_lock held, or is executed in the thread that called
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512 run before it acquires team->task_lock. */
514 static void
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
517 struct gomp_task *parent = task->parent;
518 if (parent)
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 task);
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
523 if (taskgroup)
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 task);
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 PRIORITY_INSERT_BEGIN, false,
529 task->parent_depends_on);
530 task->kind = GOMP_TASK_WAITING;
531 if (parent && parent->taskwait)
533 if (parent->taskwait->in_taskwait)
535 /* One more task has had its dependencies met.
536 Inform any waiters. */
537 parent->taskwait->in_taskwait = false;
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
540 else if (parent->taskwait->in_depend_wait)
542 /* One more task has had its dependencies met.
543 Inform any waiters. */
544 parent->taskwait->in_depend_wait = false;
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
548 if (taskgroup && taskgroup->in_taskgroup_wait)
550 /* One more task has had its dependencies met.
551 Inform any waiters. */
552 taskgroup->in_taskgroup_wait = false;
553 gomp_sem_post (&taskgroup->taskgroup_sem);
556 ++team->task_queued_count;
557 gomp_team_barrier_set_task_pending (&team->barrier);
558 /* I'm afraid this can't be done after releasing team->task_lock,
559 as gomp_target_task_completion is run from unrelated thread and
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561 the team could be gone already. */
562 if (team->nthreads > team->task_running_count)
563 gomp_team_barrier_wake (&team->barrier, 1);
566 /* Signal that a target task TTASK has completed the asynchronously
567 running phase and should be requeued as a task to handle the
568 variable unmapping. */
570 void
571 GOMP_PLUGIN_target_task_completion (void *data)
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574 struct gomp_task *task = ttask->task;
575 struct gomp_team *team = ttask->team;
577 gomp_mutex_lock (&team->task_lock);
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
581 gomp_mutex_unlock (&team->task_lock);
583 ttask->state = GOMP_TARGET_TASK_FINISHED;
584 gomp_target_task_completion (team, task);
585 gomp_mutex_unlock (&team->task_lock);
588 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590 /* Called for nowait target tasks. */
592 bool
593 gomp_create_target_task (struct gomp_device_descr *devicep,
594 void (*fn) (void *), size_t mapnum, void **hostaddrs,
595 size_t *sizes, unsigned short *kinds,
596 unsigned int flags, void **depend,
597 enum gomp_target_task_state state)
599 struct gomp_thread *thr = gomp_thread ();
600 struct gomp_team *team = thr->ts.team;
602 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
603 if (team
604 && (gomp_team_barrier_cancelled (&team->barrier)
605 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
606 return true;
608 struct gomp_target_task *ttask;
609 struct gomp_task *task;
610 struct gomp_task *parent = thr->task;
611 struct gomp_taskgroup *taskgroup = parent->taskgroup;
612 bool do_wake;
613 size_t depend_size = 0;
614 uintptr_t depend_cnt = 0;
615 size_t tgt_align = 0, tgt_size = 0;
617 if (depend != NULL)
619 depend_cnt = (uintptr_t) depend[0];
620 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622 if (fn)
624 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
625 firstprivate on the target task. */
626 size_t i;
627 for (i = 0; i < mapnum; i++)
628 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630 size_t align = (size_t) 1 << (kinds[i] >> 8);
631 if (tgt_align < align)
632 tgt_align = align;
633 tgt_size = (tgt_size + align - 1) & ~(align - 1);
634 tgt_size += sizes[i];
636 if (tgt_align)
637 tgt_size += tgt_align - 1;
638 else
639 tgt_size = 0;
642 task = gomp_malloc (sizeof (*task) + depend_size
643 + sizeof (*ttask)
644 + mapnum * (sizeof (void *) + sizeof (size_t)
645 + sizeof (unsigned short))
646 + tgt_size);
647 gomp_init_task (task, parent, gomp_icv (false));
648 task->priority = 0;
649 task->kind = GOMP_TASK_WAITING;
650 task->in_tied_task = parent->in_tied_task;
651 task->taskgroup = taskgroup;
652 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
653 ttask->devicep = devicep;
654 ttask->fn = fn;
655 ttask->mapnum = mapnum;
656 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
657 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
658 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
659 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
660 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
661 if (tgt_align)
663 char *tgt = (char *) &ttask->kinds[mapnum];
664 size_t i;
665 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
666 if (al)
667 tgt += tgt_align - al;
668 tgt_size = 0;
669 for (i = 0; i < mapnum; i++)
670 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
672 size_t align = (size_t) 1 << (kinds[i] >> 8);
673 tgt_size = (tgt_size + align - 1) & ~(align - 1);
674 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
675 ttask->hostaddrs[i] = tgt + tgt_size;
676 tgt_size = tgt_size + sizes[i];
679 ttask->flags = flags;
680 ttask->state = state;
681 ttask->task = task;
682 ttask->team = team;
683 task->fn = NULL;
684 task->fn_data = ttask;
685 task->final_task = 0;
686 gomp_mutex_lock (&team->task_lock);
687 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
688 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
689 || (taskgroup && taskgroup->cancelled), 0))
691 gomp_mutex_unlock (&team->task_lock);
692 gomp_finish_task (task);
693 free (task);
694 return true;
696 if (depend_size)
698 gomp_task_handle_depend (task, parent, depend);
699 if (task->num_dependees)
701 if (taskgroup)
702 taskgroup->num_children++;
703 gomp_mutex_unlock (&team->task_lock);
704 return true;
707 if (state == GOMP_TARGET_TASK_DATA)
709 gomp_task_run_post_handle_depend_hash (task);
710 gomp_mutex_unlock (&team->task_lock);
711 gomp_finish_task (task);
712 free (task);
713 return false;
715 if (taskgroup)
716 taskgroup->num_children++;
717 /* For async offloading, if we don't need to wait for dependencies,
718 run the gomp_target_task_fn right away, essentially schedule the
719 mapping part of the task in the current thread. */
720 if (devicep != NULL
721 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
723 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
724 PRIORITY_INSERT_END,
725 /*adjust_parent_depends_on=*/false,
726 task->parent_depends_on);
727 if (taskgroup)
728 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
729 task, 0, PRIORITY_INSERT_END,
730 /*adjust_parent_depends_on=*/false,
731 task->parent_depends_on);
732 task->pnode[PQ_TEAM].next = NULL;
733 task->pnode[PQ_TEAM].prev = NULL;
734 task->kind = GOMP_TASK_TIED;
735 ++team->task_count;
736 gomp_mutex_unlock (&team->task_lock);
738 thr->task = task;
739 gomp_target_task_fn (task->fn_data);
740 thr->task = parent;
742 gomp_mutex_lock (&team->task_lock);
743 task->kind = GOMP_TASK_ASYNC_RUNNING;
744 /* If GOMP_PLUGIN_target_task_completion has run already
745 in between gomp_target_task_fn and the mutex lock,
746 perform the requeuing here. */
747 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
748 gomp_target_task_completion (team, task);
749 else
750 ttask->state = GOMP_TARGET_TASK_RUNNING;
751 gomp_mutex_unlock (&team->task_lock);
752 return true;
754 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
755 PRIORITY_INSERT_BEGIN,
756 /*adjust_parent_depends_on=*/false,
757 task->parent_depends_on);
758 if (taskgroup)
759 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
760 PRIORITY_INSERT_BEGIN,
761 /*adjust_parent_depends_on=*/false,
762 task->parent_depends_on);
763 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
764 PRIORITY_INSERT_END,
765 /*adjust_parent_depends_on=*/false,
766 task->parent_depends_on);
767 ++team->task_count;
768 ++team->task_queued_count;
769 gomp_team_barrier_set_task_pending (&team->barrier);
770 do_wake = team->task_running_count + !parent->in_tied_task
771 < team->nthreads;
772 gomp_mutex_unlock (&team->task_lock);
773 if (do_wake)
774 gomp_team_barrier_wake (&team->barrier, 1);
775 return true;
778 /* Given a parent_depends_on task in LIST, move it to the front of its
779 priority so it is run as soon as possible.
781 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
783 We rearrange the queue such that all parent_depends_on tasks are
784 first, and last_parent_depends_on points to the last such task we
785 rearranged. For example, given the following tasks in a queue
786 where PD[123] are the parent_depends_on tasks:
788 task->children
791 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
793 We rearrange such that:
795 task->children
796 | +--- last_parent_depends_on
799 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
801 static void inline
802 priority_list_upgrade_task (struct priority_list *list,
803 struct priority_node *node)
805 struct priority_node *last_parent_depends_on
806 = list->last_parent_depends_on;
807 if (last_parent_depends_on)
809 node->prev->next = node->next;
810 node->next->prev = node->prev;
811 node->prev = last_parent_depends_on;
812 node->next = last_parent_depends_on->next;
813 node->prev->next = node;
814 node->next->prev = node;
816 else if (node != list->tasks)
818 node->prev->next = node->next;
819 node->next->prev = node->prev;
820 node->prev = list->tasks->prev;
821 node->next = list->tasks;
822 list->tasks = node;
823 node->prev->next = node;
824 node->next->prev = node;
826 list->last_parent_depends_on = node;
829 /* Given a parent_depends_on TASK in its parent's children_queue, move
830 it to the front of its priority so it is run as soon as possible.
832 PARENT is passed as an optimization.
834 (This function could be defined in priority_queue.c, but we want it
835 inlined, and putting it in priority_queue.h is not an option, given
836 that gomp_task has not been properly defined at that point). */
838 static void inline
839 priority_queue_upgrade_task (struct gomp_task *task,
840 struct gomp_task *parent)
842 struct priority_queue *head = &parent->children_queue;
843 struct priority_node *node = &task->pnode[PQ_CHILDREN];
844 #if _LIBGOMP_CHECKING_
845 if (!task->parent_depends_on)
846 gomp_fatal ("priority_queue_upgrade_task: task must be a "
847 "parent_depends_on task");
848 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
849 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
850 #endif
851 if (priority_queue_multi_p (head))
853 struct priority_list *list
854 = priority_queue_lookup_priority (head, task->priority);
855 priority_list_upgrade_task (list, node);
857 else
858 priority_list_upgrade_task (&head->l, node);
861 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
862 the way in LIST so that other tasks can be considered for
863 execution. LIST contains tasks of type TYPE.
865 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
866 if applicable. */
868 static void inline
869 priority_list_downgrade_task (enum priority_queue_type type,
870 struct priority_list *list,
871 struct gomp_task *child_task)
873 struct priority_node *node = task_to_priority_node (type, child_task);
874 if (list->tasks == node)
875 list->tasks = node->next;
876 else if (node->next != list->tasks)
878 /* The task in NODE is about to become TIED and TIED tasks
879 cannot come before WAITING tasks. If we're about to
880 leave the queue in such an indeterminate state, rewire
881 things appropriately. However, a TIED task at the end is
882 perfectly fine. */
883 struct gomp_task *next_task = priority_node_to_task (type, node->next);
884 if (next_task->kind == GOMP_TASK_WAITING)
886 /* Remove from list. */
887 node->prev->next = node->next;
888 node->next->prev = node->prev;
889 /* Rewire at the end. */
890 node->next = list->tasks;
891 node->prev = list->tasks->prev;
892 list->tasks->prev->next = node;
893 list->tasks->prev = node;
897 /* If the current task is the last_parent_depends_on for its
898 priority, adjust last_parent_depends_on appropriately. */
899 if (__builtin_expect (child_task->parent_depends_on, 0)
900 && list->last_parent_depends_on == node)
902 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
903 if (node->prev != node
904 && prev_child->kind == GOMP_TASK_WAITING
905 && prev_child->parent_depends_on)
906 list->last_parent_depends_on = node->prev;
907 else
909 /* There are no more parent_depends_on entries waiting
910 to run, clear the list. */
911 list->last_parent_depends_on = NULL;
916 /* Given a TASK in HEAD that is about to be executed, move it out of
917 the way so that other tasks can be considered for execution. HEAD
918 contains tasks of type TYPE.
920 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
921 if applicable.
923 (This function could be defined in priority_queue.c, but we want it
924 inlined, and putting it in priority_queue.h is not an option, given
925 that gomp_task has not been properly defined at that point). */
927 static void inline
928 priority_queue_downgrade_task (enum priority_queue_type type,
929 struct priority_queue *head,
930 struct gomp_task *task)
932 #if _LIBGOMP_CHECKING_
933 if (!priority_queue_task_in_queue_p (type, head, task))
934 gomp_fatal ("Attempt to downgrade missing task %p", task);
935 #endif
936 if (priority_queue_multi_p (head))
938 struct priority_list *list
939 = priority_queue_lookup_priority (head, task->priority);
940 priority_list_downgrade_task (type, list, task);
942 else
943 priority_list_downgrade_task (type, &head->l, task);
946 /* Setup CHILD_TASK to execute. This is done by setting the task to
947 TIED, and updating all relevant queues so that CHILD_TASK is no
948 longer chosen for scheduling. Also, remove CHILD_TASK from the
949 overall team task queue entirely.
951 Return TRUE if task or its containing taskgroup has been
952 cancelled. */
954 static inline bool
955 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
956 struct gomp_team *team)
958 #if _LIBGOMP_CHECKING_
959 if (child_task->parent)
960 priority_queue_verify (PQ_CHILDREN,
961 &child_task->parent->children_queue, true);
962 if (child_task->taskgroup)
963 priority_queue_verify (PQ_TASKGROUP,
964 &child_task->taskgroup->taskgroup_queue, false);
965 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
966 #endif
968 /* Task is about to go tied, move it out of the way. */
969 if (parent)
970 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
971 child_task);
973 /* Task is about to go tied, move it out of the way. */
974 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
975 if (taskgroup)
976 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
977 child_task);
979 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
980 MEMMODEL_RELAXED);
981 child_task->pnode[PQ_TEAM].next = NULL;
982 child_task->pnode[PQ_TEAM].prev = NULL;
983 child_task->kind = GOMP_TASK_TIED;
985 if (--team->task_queued_count == 0)
986 gomp_team_barrier_clear_task_pending (&team->barrier);
987 if ((gomp_team_barrier_cancelled (&team->barrier)
988 || (taskgroup && taskgroup->cancelled))
989 && !child_task->copy_ctors_done)
990 return true;
991 return false;
994 static void
995 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
997 struct gomp_task *parent = child_task->parent;
998 size_t i;
1000 for (i = 0; i < child_task->depend_count; i++)
1001 if (!child_task->depend[i].redundant)
1003 if (child_task->depend[i].next)
1004 child_task->depend[i].next->prev = child_task->depend[i].prev;
1005 if (child_task->depend[i].prev)
1006 child_task->depend[i].prev->next = child_task->depend[i].next;
1007 else
1009 hash_entry_type *slot
1010 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1011 NO_INSERT);
1012 if (*slot != &child_task->depend[i])
1013 abort ();
1014 if (child_task->depend[i].next)
1015 *slot = child_task->depend[i].next;
1016 else
1017 htab_clear_slot (parent->depend_hash, slot);
1022 /* After a CHILD_TASK has been run, adjust the dependency queue for
1023 each task that depends on CHILD_TASK, to record the fact that there
1024 is one less dependency to worry about. If a task that depended on
1025 CHILD_TASK now has no dependencies, place it in the various queues
1026 so it gets scheduled to run.
1028 TEAM is the team to which CHILD_TASK belongs to. */
1030 static size_t
1031 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1032 struct gomp_team *team)
1034 struct gomp_task *parent = child_task->parent;
1035 size_t i, count = child_task->dependers->n_elem, ret = 0;
1036 for (i = 0; i < count; i++)
1038 struct gomp_task *task = child_task->dependers->elem[i];
1040 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1041 TASK's remaining dependencies. Once TASK has no other
1042 depenencies, put it into the various queues so it will get
1043 scheduled for execution. */
1044 if (--task->num_dependees != 0)
1045 continue;
1047 struct gomp_taskgroup *taskgroup = task->taskgroup;
1048 if (parent)
1050 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1051 task, task->priority,
1052 PRIORITY_INSERT_BEGIN,
1053 /*adjust_parent_depends_on=*/true,
1054 task->parent_depends_on);
1055 if (parent->taskwait)
1057 if (parent->taskwait->in_taskwait)
1059 /* One more task has had its dependencies met.
1060 Inform any waiters. */
1061 parent->taskwait->in_taskwait = false;
1062 gomp_sem_post (&parent->taskwait->taskwait_sem);
1064 else if (parent->taskwait->in_depend_wait)
1066 /* One more task has had its dependencies met.
1067 Inform any waiters. */
1068 parent->taskwait->in_depend_wait = false;
1069 gomp_sem_post (&parent->taskwait->taskwait_sem);
1073 if (taskgroup)
1075 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1076 task, task->priority,
1077 PRIORITY_INSERT_BEGIN,
1078 /*adjust_parent_depends_on=*/false,
1079 task->parent_depends_on);
1080 if (taskgroup->in_taskgroup_wait)
1082 /* One more task has had its dependencies met.
1083 Inform any waiters. */
1084 taskgroup->in_taskgroup_wait = false;
1085 gomp_sem_post (&taskgroup->taskgroup_sem);
1088 priority_queue_insert (PQ_TEAM, &team->task_queue,
1089 task, task->priority,
1090 PRIORITY_INSERT_END,
1091 /*adjust_parent_depends_on=*/false,
1092 task->parent_depends_on);
1093 ++team->task_count;
1094 ++team->task_queued_count;
1095 ++ret;
1097 free (child_task->dependers);
1098 child_task->dependers = NULL;
1099 if (ret > 1)
1100 gomp_team_barrier_set_task_pending (&team->barrier);
1101 return ret;
1104 static inline size_t
1105 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1106 struct gomp_team *team)
1108 if (child_task->depend_count == 0)
1109 return 0;
1111 /* If parent is gone already, the hash table is freed and nothing
1112 will use the hash table anymore, no need to remove anything from it. */
1113 if (child_task->parent != NULL)
1114 gomp_task_run_post_handle_depend_hash (child_task);
1116 if (child_task->dependers == NULL)
1117 return 0;
1119 return gomp_task_run_post_handle_dependers (child_task, team);
1122 /* Remove CHILD_TASK from its parent. */
1124 static inline void
1125 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1127 struct gomp_task *parent = child_task->parent;
1128 if (parent == NULL)
1129 return;
1131 /* If this was the last task the parent was depending on,
1132 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1133 clean up and return. */
1134 if (__builtin_expect (child_task->parent_depends_on, 0)
1135 && --parent->taskwait->n_depend == 0
1136 && parent->taskwait->in_depend_wait)
1138 parent->taskwait->in_depend_wait = false;
1139 gomp_sem_post (&parent->taskwait->taskwait_sem);
1142 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1143 child_task, MEMMODEL_RELEASE)
1144 && parent->taskwait && parent->taskwait->in_taskwait)
1146 parent->taskwait->in_taskwait = false;
1147 gomp_sem_post (&parent->taskwait->taskwait_sem);
1149 child_task->pnode[PQ_CHILDREN].next = NULL;
1150 child_task->pnode[PQ_CHILDREN].prev = NULL;
1153 /* Remove CHILD_TASK from its taskgroup. */
1155 static inline void
1156 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1158 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1159 if (taskgroup == NULL)
1160 return;
1161 bool empty = priority_queue_remove (PQ_TASKGROUP,
1162 &taskgroup->taskgroup_queue,
1163 child_task, MEMMODEL_RELAXED);
1164 child_task->pnode[PQ_TASKGROUP].next = NULL;
1165 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1166 if (taskgroup->num_children > 1)
1167 --taskgroup->num_children;
1168 else
1170 /* We access taskgroup->num_children in GOMP_taskgroup_end
1171 outside of the task lock mutex region, so
1172 need a release barrier here to ensure memory
1173 written by child_task->fn above is flushed
1174 before the NULL is written. */
1175 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1177 if (empty && taskgroup->in_taskgroup_wait)
1179 taskgroup->in_taskgroup_wait = false;
1180 gomp_sem_post (&taskgroup->taskgroup_sem);
1184 void
1185 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1187 struct gomp_thread *thr = gomp_thread ();
1188 struct gomp_team *team = thr->ts.team;
1189 struct gomp_task *task = thr->task;
1190 struct gomp_task *child_task = NULL;
1191 struct gomp_task *to_free = NULL;
1192 int do_wake = 0;
1194 gomp_mutex_lock (&team->task_lock);
1195 if (gomp_barrier_last_thread (state))
1197 if (team->task_count == 0)
1199 gomp_team_barrier_done (&team->barrier, state);
1200 gomp_mutex_unlock (&team->task_lock);
1201 gomp_team_barrier_wake (&team->barrier, 0);
1202 return;
1204 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207 while (1)
1209 bool cancelled = false;
1210 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1212 bool ignored;
1213 child_task
1214 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1215 PQ_IGNORED, NULL,
1216 &ignored);
1217 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1218 team);
1219 if (__builtin_expect (cancelled, 0))
1221 if (to_free)
1223 gomp_finish_task (to_free);
1224 free (to_free);
1225 to_free = NULL;
1227 goto finish_cancelled;
1229 team->task_running_count++;
1230 child_task->in_tied_task = true;
1232 gomp_mutex_unlock (&team->task_lock);
1233 if (do_wake)
1235 gomp_team_barrier_wake (&team->barrier, do_wake);
1236 do_wake = 0;
1238 if (to_free)
1240 gomp_finish_task (to_free);
1241 free (to_free);
1242 to_free = NULL;
1244 if (child_task)
1246 thr->task = child_task;
1247 if (__builtin_expect (child_task->fn == NULL, 0))
1249 if (gomp_target_task_fn (child_task->fn_data))
1251 thr->task = task;
1252 gomp_mutex_lock (&team->task_lock);
1253 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1254 team->task_running_count--;
1255 struct gomp_target_task *ttask
1256 = (struct gomp_target_task *) child_task->fn_data;
1257 /* If GOMP_PLUGIN_target_task_completion has run already
1258 in between gomp_target_task_fn and the mutex lock,
1259 perform the requeuing here. */
1260 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1261 gomp_target_task_completion (team, child_task);
1262 else
1263 ttask->state = GOMP_TARGET_TASK_RUNNING;
1264 child_task = NULL;
1265 continue;
1268 else
1269 child_task->fn (child_task->fn_data);
1270 thr->task = task;
1272 else
1273 return;
1274 gomp_mutex_lock (&team->task_lock);
1275 if (child_task)
1277 finish_cancelled:;
1278 size_t new_tasks
1279 = gomp_task_run_post_handle_depend (child_task, team);
1280 gomp_task_run_post_remove_parent (child_task);
1281 gomp_clear_parent (&child_task->children_queue);
1282 gomp_task_run_post_remove_taskgroup (child_task);
1283 to_free = child_task;
1284 child_task = NULL;
1285 if (!cancelled)
1286 team->task_running_count--;
1287 if (new_tasks > 1)
1289 do_wake = team->nthreads - team->task_running_count;
1290 if (do_wake > new_tasks)
1291 do_wake = new_tasks;
1293 if (--team->task_count == 0
1294 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1296 gomp_team_barrier_done (&team->barrier, state);
1297 gomp_mutex_unlock (&team->task_lock);
1298 gomp_team_barrier_wake (&team->barrier, 0);
1299 gomp_mutex_lock (&team->task_lock);
1305 /* Called when encountering a taskwait directive.
1307 Wait for all children of the current task. */
1309 void
1310 GOMP_taskwait (void)
1312 struct gomp_thread *thr = gomp_thread ();
1313 struct gomp_team *team = thr->ts.team;
1314 struct gomp_task *task = thr->task;
1315 struct gomp_task *child_task = NULL;
1316 struct gomp_task *to_free = NULL;
1317 struct gomp_taskwait taskwait;
1318 int do_wake = 0;
1320 /* The acquire barrier on load of task->children here synchronizes
1321 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1322 not necessary that we synchronize with other non-NULL writes at
1323 this point, but we must ensure that all writes to memory by a
1324 child thread task work function are seen before we exit from
1325 GOMP_taskwait. */
1326 if (task == NULL
1327 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1328 return;
1330 memset (&taskwait, 0, sizeof (taskwait));
1331 bool child_q = false;
1332 gomp_mutex_lock (&team->task_lock);
1333 while (1)
1335 bool cancelled = false;
1336 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1338 bool destroy_taskwait = task->taskwait != NULL;
1339 task->taskwait = NULL;
1340 gomp_mutex_unlock (&team->task_lock);
1341 if (to_free)
1343 gomp_finish_task (to_free);
1344 free (to_free);
1346 if (destroy_taskwait)
1347 gomp_sem_destroy (&taskwait.taskwait_sem);
1348 return;
1350 struct gomp_task *next_task
1351 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1352 PQ_TEAM, &team->task_queue, &child_q);
1353 if (next_task->kind == GOMP_TASK_WAITING)
1355 child_task = next_task;
1356 cancelled
1357 = gomp_task_run_pre (child_task, task, team);
1358 if (__builtin_expect (cancelled, 0))
1360 if (to_free)
1362 gomp_finish_task (to_free);
1363 free (to_free);
1364 to_free = NULL;
1366 goto finish_cancelled;
1369 else
1371 /* All tasks we are waiting for are either running in other
1372 threads, or they are tasks that have not had their
1373 dependencies met (so they're not even in the queue). Wait
1374 for them. */
1375 if (task->taskwait == NULL)
1377 taskwait.in_depend_wait = false;
1378 gomp_sem_init (&taskwait.taskwait_sem, 0);
1379 task->taskwait = &taskwait;
1381 taskwait.in_taskwait = true;
1383 gomp_mutex_unlock (&team->task_lock);
1384 if (do_wake)
1386 gomp_team_barrier_wake (&team->barrier, do_wake);
1387 do_wake = 0;
1389 if (to_free)
1391 gomp_finish_task (to_free);
1392 free (to_free);
1393 to_free = NULL;
1395 if (child_task)
1397 thr->task = child_task;
1398 if (__builtin_expect (child_task->fn == NULL, 0))
1400 if (gomp_target_task_fn (child_task->fn_data))
1402 thr->task = task;
1403 gomp_mutex_lock (&team->task_lock);
1404 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1405 struct gomp_target_task *ttask
1406 = (struct gomp_target_task *) child_task->fn_data;
1407 /* If GOMP_PLUGIN_target_task_completion has run already
1408 in between gomp_target_task_fn and the mutex lock,
1409 perform the requeuing here. */
1410 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1411 gomp_target_task_completion (team, child_task);
1412 else
1413 ttask->state = GOMP_TARGET_TASK_RUNNING;
1414 child_task = NULL;
1415 continue;
1418 else
1419 child_task->fn (child_task->fn_data);
1420 thr->task = task;
1422 else
1423 gomp_sem_wait (&taskwait.taskwait_sem);
1424 gomp_mutex_lock (&team->task_lock);
1425 if (child_task)
1427 finish_cancelled:;
1428 size_t new_tasks
1429 = gomp_task_run_post_handle_depend (child_task, team);
1431 if (child_q)
1433 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1434 child_task, MEMMODEL_RELAXED);
1435 child_task->pnode[PQ_CHILDREN].next = NULL;
1436 child_task->pnode[PQ_CHILDREN].prev = NULL;
1439 gomp_clear_parent (&child_task->children_queue);
1441 gomp_task_run_post_remove_taskgroup (child_task);
1443 to_free = child_task;
1444 child_task = NULL;
1445 team->task_count--;
1446 if (new_tasks > 1)
1448 do_wake = team->nthreads - team->task_running_count
1449 - !task->in_tied_task;
1450 if (do_wake > new_tasks)
1451 do_wake = new_tasks;
1457 /* An undeferred task is about to run. Wait for all tasks that this
1458 undeferred task depends on.
1460 This is done by first putting all known ready dependencies
1461 (dependencies that have their own dependencies met) at the top of
1462 the scheduling queues. Then we iterate through these imminently
1463 ready tasks (and possibly other high priority tasks), and run them.
1464 If we run out of ready dependencies to execute, we either wait for
1465 the reamining dependencies to finish, or wait for them to get
1466 scheduled so we can run them.
1468 DEPEND is as in GOMP_task. */
1470 void
1471 gomp_task_maybe_wait_for_dependencies (void **depend)
1473 struct gomp_thread *thr = gomp_thread ();
1474 struct gomp_task *task = thr->task;
1475 struct gomp_team *team = thr->ts.team;
1476 struct gomp_task_depend_entry elem, *ent = NULL;
1477 struct gomp_taskwait taskwait;
1478 size_t ndepend = (uintptr_t) depend[0];
1479 size_t nout = (uintptr_t) depend[1];
1480 size_t i;
1481 size_t num_awaited = 0;
1482 struct gomp_task *child_task = NULL;
1483 struct gomp_task *to_free = NULL;
1484 int do_wake = 0;
1486 gomp_mutex_lock (&team->task_lock);
1487 for (i = 0; i < ndepend; i++)
1489 elem.addr = depend[i + 2];
1490 ent = htab_find (task->depend_hash, &elem);
1491 for (; ent; ent = ent->next)
1492 if (i >= nout && ent->is_in)
1493 continue;
1494 else
1496 struct gomp_task *tsk = ent->task;
1497 if (!tsk->parent_depends_on)
1499 tsk->parent_depends_on = true;
1500 ++num_awaited;
1501 /* If depenency TSK itself has no dependencies and is
1502 ready to run, move it up front so that we run it as
1503 soon as possible. */
1504 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1505 priority_queue_upgrade_task (tsk, task);
1509 if (num_awaited == 0)
1511 gomp_mutex_unlock (&team->task_lock);
1512 return;
1515 memset (&taskwait, 0, sizeof (taskwait));
1516 taskwait.n_depend = num_awaited;
1517 gomp_sem_init (&taskwait.taskwait_sem, 0);
1518 task->taskwait = &taskwait;
1520 while (1)
1522 bool cancelled = false;
1523 if (taskwait.n_depend == 0)
1525 task->taskwait = NULL;
1526 gomp_mutex_unlock (&team->task_lock);
1527 if (to_free)
1529 gomp_finish_task (to_free);
1530 free (to_free);
1532 gomp_sem_destroy (&taskwait.taskwait_sem);
1533 return;
1536 /* Theoretically when we have multiple priorities, we should
1537 chose between the highest priority item in
1538 task->children_queue and team->task_queue here, so we should
1539 use priority_queue_next_task(). However, since we are
1540 running an undeferred task, perhaps that makes all tasks it
1541 depends on undeferred, thus a priority of INF? This would
1542 make it unnecessary to take anything into account here,
1543 but the dependencies.
1545 On the other hand, if we want to use priority_queue_next_task(),
1546 care should be taken to only use priority_queue_remove()
1547 below if the task was actually removed from the children
1548 queue. */
1549 bool ignored;
1550 struct gomp_task *next_task
1551 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1552 PQ_IGNORED, NULL, &ignored);
1554 if (next_task->kind == GOMP_TASK_WAITING)
1556 child_task = next_task;
1557 cancelled
1558 = gomp_task_run_pre (child_task, task, team);
1559 if (__builtin_expect (cancelled, 0))
1561 if (to_free)
1563 gomp_finish_task (to_free);
1564 free (to_free);
1565 to_free = NULL;
1567 goto finish_cancelled;
1570 else
1571 /* All tasks we are waiting for are either running in other
1572 threads, or they are tasks that have not had their
1573 dependencies met (so they're not even in the queue). Wait
1574 for them. */
1575 taskwait.in_depend_wait = true;
1576 gomp_mutex_unlock (&team->task_lock);
1577 if (do_wake)
1579 gomp_team_barrier_wake (&team->barrier, do_wake);
1580 do_wake = 0;
1582 if (to_free)
1584 gomp_finish_task (to_free);
1585 free (to_free);
1586 to_free = NULL;
1588 if (child_task)
1590 thr->task = child_task;
1591 if (__builtin_expect (child_task->fn == NULL, 0))
1593 if (gomp_target_task_fn (child_task->fn_data))
1595 thr->task = task;
1596 gomp_mutex_lock (&team->task_lock);
1597 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1598 struct gomp_target_task *ttask
1599 = (struct gomp_target_task *) child_task->fn_data;
1600 /* If GOMP_PLUGIN_target_task_completion has run already
1601 in between gomp_target_task_fn and the mutex lock,
1602 perform the requeuing here. */
1603 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1604 gomp_target_task_completion (team, child_task);
1605 else
1606 ttask->state = GOMP_TARGET_TASK_RUNNING;
1607 child_task = NULL;
1608 continue;
1611 else
1612 child_task->fn (child_task->fn_data);
1613 thr->task = task;
1615 else
1616 gomp_sem_wait (&taskwait.taskwait_sem);
1617 gomp_mutex_lock (&team->task_lock);
1618 if (child_task)
1620 finish_cancelled:;
1621 size_t new_tasks
1622 = gomp_task_run_post_handle_depend (child_task, team);
1623 if (child_task->parent_depends_on)
1624 --taskwait.n_depend;
1626 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1627 child_task, MEMMODEL_RELAXED);
1628 child_task->pnode[PQ_CHILDREN].next = NULL;
1629 child_task->pnode[PQ_CHILDREN].prev = NULL;
1631 gomp_clear_parent (&child_task->children_queue);
1632 gomp_task_run_post_remove_taskgroup (child_task);
1633 to_free = child_task;
1634 child_task = NULL;
1635 team->task_count--;
1636 if (new_tasks > 1)
1638 do_wake = team->nthreads - team->task_running_count
1639 - !task->in_tied_task;
1640 if (do_wake > new_tasks)
1641 do_wake = new_tasks;
1647 /* Called when encountering a taskyield directive. */
1649 void
1650 GOMP_taskyield (void)
1652 /* Nothing at the moment. */
1655 void
1656 GOMP_taskgroup_start (void)
1658 struct gomp_thread *thr = gomp_thread ();
1659 struct gomp_team *team = thr->ts.team;
1660 struct gomp_task *task = thr->task;
1661 struct gomp_taskgroup *taskgroup;
1663 /* If team is NULL, all tasks are executed as
1664 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1665 taskgroup and their descendant tasks will be finished
1666 by the time GOMP_taskgroup_end is called. */
1667 if (team == NULL)
1668 return;
1669 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1670 taskgroup->prev = task->taskgroup;
1671 priority_queue_init (&taskgroup->taskgroup_queue);
1672 taskgroup->in_taskgroup_wait = false;
1673 taskgroup->cancelled = false;
1674 taskgroup->num_children = 0;
1675 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1676 task->taskgroup = taskgroup;
1679 void
1680 GOMP_taskgroup_end (void)
1682 struct gomp_thread *thr = gomp_thread ();
1683 struct gomp_team *team = thr->ts.team;
1684 struct gomp_task *task = thr->task;
1685 struct gomp_taskgroup *taskgroup;
1686 struct gomp_task *child_task = NULL;
1687 struct gomp_task *to_free = NULL;
1688 int do_wake = 0;
1690 if (team == NULL)
1691 return;
1692 taskgroup = task->taskgroup;
1693 if (__builtin_expect (taskgroup == NULL, 0)
1694 && thr->ts.level == 0)
1696 /* This can happen if GOMP_taskgroup_start is called when
1697 thr->ts.team == NULL, but inside of the taskgroup there
1698 is #pragma omp target nowait that creates an implicit
1699 team with a single thread. In this case, we want to wait
1700 for all outstanding tasks in this team. */
1701 gomp_team_barrier_wait (&team->barrier);
1702 return;
1705 /* The acquire barrier on load of taskgroup->num_children here
1706 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1707 It is not necessary that we synchronize with other non-0 writes at
1708 this point, but we must ensure that all writes to memory by a
1709 child thread task work function are seen before we exit from
1710 GOMP_taskgroup_end. */
1711 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1712 goto finish;
1714 bool unused;
1715 gomp_mutex_lock (&team->task_lock);
1716 while (1)
1718 bool cancelled = false;
1719 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1720 MEMMODEL_RELAXED))
1722 if (taskgroup->num_children)
1724 if (priority_queue_empty_p (&task->children_queue,
1725 MEMMODEL_RELAXED))
1726 goto do_wait;
1727 child_task
1728 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1729 PQ_TEAM, &team->task_queue,
1730 &unused);
1732 else
1734 gomp_mutex_unlock (&team->task_lock);
1735 if (to_free)
1737 gomp_finish_task (to_free);
1738 free (to_free);
1740 goto finish;
1743 else
1744 child_task
1745 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1746 PQ_TEAM, &team->task_queue, &unused);
1747 if (child_task->kind == GOMP_TASK_WAITING)
1749 cancelled
1750 = gomp_task_run_pre (child_task, child_task->parent, team);
1751 if (__builtin_expect (cancelled, 0))
1753 if (to_free)
1755 gomp_finish_task (to_free);
1756 free (to_free);
1757 to_free = NULL;
1759 goto finish_cancelled;
1762 else
1764 child_task = NULL;
1765 do_wait:
1766 /* All tasks we are waiting for are either running in other
1767 threads, or they are tasks that have not had their
1768 dependencies met (so they're not even in the queue). Wait
1769 for them. */
1770 taskgroup->in_taskgroup_wait = true;
1772 gomp_mutex_unlock (&team->task_lock);
1773 if (do_wake)
1775 gomp_team_barrier_wake (&team->barrier, do_wake);
1776 do_wake = 0;
1778 if (to_free)
1780 gomp_finish_task (to_free);
1781 free (to_free);
1782 to_free = NULL;
1784 if (child_task)
1786 thr->task = child_task;
1787 if (__builtin_expect (child_task->fn == NULL, 0))
1789 if (gomp_target_task_fn (child_task->fn_data))
1791 thr->task = task;
1792 gomp_mutex_lock (&team->task_lock);
1793 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1794 struct gomp_target_task *ttask
1795 = (struct gomp_target_task *) child_task->fn_data;
1796 /* If GOMP_PLUGIN_target_task_completion has run already
1797 in between gomp_target_task_fn and the mutex lock,
1798 perform the requeuing here. */
1799 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1800 gomp_target_task_completion (team, child_task);
1801 else
1802 ttask->state = GOMP_TARGET_TASK_RUNNING;
1803 child_task = NULL;
1804 continue;
1807 else
1808 child_task->fn (child_task->fn_data);
1809 thr->task = task;
1811 else
1812 gomp_sem_wait (&taskgroup->taskgroup_sem);
1813 gomp_mutex_lock (&team->task_lock);
1814 if (child_task)
1816 finish_cancelled:;
1817 size_t new_tasks
1818 = gomp_task_run_post_handle_depend (child_task, team);
1819 gomp_task_run_post_remove_parent (child_task);
1820 gomp_clear_parent (&child_task->children_queue);
1821 gomp_task_run_post_remove_taskgroup (child_task);
1822 to_free = child_task;
1823 child_task = NULL;
1824 team->task_count--;
1825 if (new_tasks > 1)
1827 do_wake = team->nthreads - team->task_running_count
1828 - !task->in_tied_task;
1829 if (do_wake > new_tasks)
1830 do_wake = new_tasks;
1835 finish:
1836 task->taskgroup = taskgroup->prev;
1837 gomp_sem_destroy (&taskgroup->taskgroup_sem);
1838 free (taskgroup);
1842 omp_in_final (void)
1844 struct gomp_thread *thr = gomp_thread ();
1845 return thr->task && thr->task->final_task;
1848 ialias (omp_in_final)