2016-03-14 Richard Biener <rguenther@suse.de>
[official-gcc.git] / libgomp / task.c
blob38d4e9b413bee60ed70eb0aba81ec37153c0fa0b
1 /* Copyright (C) 2007-2016 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of tasks in response to task
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
34 typedef struct gomp_task_depend_entry *hash_entry_type;
36 static inline void *
37 htab_alloc (size_t size)
39 return gomp_malloc (size);
42 static inline void
43 htab_free (void *ptr)
45 free (ptr);
48 #include "hashtab.h"
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
53 return hash_pointer (element->addr);
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
59 return x->addr == y->addr;
62 /* Create a new task data structure. */
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
91 /* Clean up a task, after completing it. */
93 void
94 gomp_end_task (void)
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
99 gomp_finish_task (task);
100 thr->task = task->parent;
103 /* Clear the parent field of every task in LIST. */
105 static inline void
106 gomp_clear_parent_in_list (struct priority_list *list)
108 struct priority_node *p = list->tasks;
109 if (p)
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
115 while (p != list->tasks);
118 /* Splay tree version of gomp_clear_parent_in_list.
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
123 static void
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
144 static inline void
145 gomp_clear_parent (struct priority_queue *q)
147 if (priority_queue_multi_p (q))
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
154 else
155 gomp_clear_parent_in_list (&q->l);
158 /* Helper function for GOMP_task and gomp_create_target_task.
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
164 static void
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t nout = (uintptr_t) depend[1];
170 size_t i;
171 hash_entry_type ent;
173 task->depend_count = ndepend;
174 task->num_dependees = 0;
175 if (parent->depend_hash == NULL)
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177 for (i = 0; i < ndepend; i++)
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].next = NULL;
181 task->depend[i].prev = NULL;
182 task->depend[i].task = task;
183 task->depend[i].is_in = i >= nout;
184 task->depend[i].redundant = false;
185 task->depend[i].redundant_out = false;
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 &task->depend[i], INSERT);
189 hash_entry_type out = NULL, last = NULL;
190 if (*slot)
192 /* If multiple depends on the same task are the same, all but the
193 first one are redundant. As inout/out come first, if any of them
194 is inout/out, it will win, which is the right semantics. */
195 if ((*slot)->task == task)
197 task->depend[i].redundant = true;
198 continue;
200 for (ent = *slot; ent; ent = ent->next)
202 if (ent->redundant_out)
203 break;
205 last = ent;
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
208 if (i >= nout && ent->is_in)
209 continue;
211 if (!ent->is_in)
212 out = ent;
214 struct gomp_task *tsk = ent->task;
215 if (tsk->dependers == NULL)
217 tsk->dependers
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 + 6 * sizeof (struct gomp_task *));
220 tsk->dependers->n_elem = 1;
221 tsk->dependers->allocated = 6;
222 tsk->dependers->elem[0] = task;
223 task->num_dependees++;
224 continue;
226 /* We already have some other dependency on tsk from earlier
227 depend clause. */
228 else if (tsk->dependers->n_elem
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 == task))
231 continue;
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
234 tsk->dependers->allocated
235 = tsk->dependers->allocated * 2 + 2;
236 tsk->dependers
237 = gomp_realloc (tsk->dependers,
238 sizeof (struct gomp_dependers_vec)
239 + (tsk->dependers->allocated
240 * sizeof (struct gomp_task *)));
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 task->num_dependees++;
245 task->depend[i].next = *slot;
246 (*slot)->prev = &task->depend[i];
248 *slot = &task->depend[i];
250 /* There is no need to store more than one depend({,in}out:) task per
251 address in the hash table chain for the purpose of creation of
252 deferred tasks, because each out depends on all earlier outs, thus it
253 is enough to record just the last depend({,in}out:). For depend(in:),
254 we need to keep all of the previous ones not terminated yet, because
255 a later depend({,in}out:) might need to depend on all of them. So, if
256 the new task's clause is depend({,in}out:), we know there is at most
257 one other depend({,in}out:) clause in the list (out). For
258 non-deferred tasks we want to see all outs, so they are moved to the
259 end of the chain, after first redundant_out entry all following
260 entries should be redundant_out. */
261 if (!task->depend[i].is_in && out)
263 if (out != last)
265 out->next->prev = out->prev;
266 out->prev->next = out->next;
267 out->next = last->next;
268 out->prev = last;
269 last->next = out;
270 if (out->next)
271 out->next->prev = out;
273 out->redundant_out = true;
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
279 false, then we must not delay in executing the task. If UNTIED is true,
280 then the task may be executed by any member of the team.
282 DEPEND is an array containing:
283 depend[0]: number of depend elements.
284 depend[1]: number of depend elements of type "out".
285 depend[2..N+1]: address of [1..N]th depend element. */
287 void
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
290 void **depend, int priority)
292 struct gomp_thread *thr = gomp_thread ();
293 struct gomp_team *team = thr->ts.team;
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297 tied to one thread all the time. This means UNTIED tasks must be
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299 might be running on different thread than FN. */
300 if (cpyfn)
301 if_clause = false;
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306 if (team
307 && (gomp_team_barrier_cancelled (&team->barrier)
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309 return;
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312 priority = 0;
313 else if (priority > gomp_max_task_priority_var)
314 priority = gomp_max_task_priority_var;
316 if (!if_clause || team == NULL
317 || (thr->task && thr->task->final_task)
318 || team->task_count > 64 * team->nthreads)
320 struct gomp_task task;
322 /* If there are depend clauses and earlier deferred sibling tasks
323 with depend clauses, check if there isn't a dependency. If there
324 is, we need to wait for them. There is no need to handle
325 depend clauses for non-deferred tasks other than this, because
326 the parent task is suspended until the child task finishes and thus
327 it can't start further child tasks. */
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
329 && thr->task && thr->task->depend_hash)
330 gomp_task_maybe_wait_for_dependencies (depend);
332 gomp_init_task (&task, thr->task, gomp_icv (false));
333 task.kind = GOMP_TASK_UNDEFERRED;
334 task.final_task = (thr->task && thr->task->final_task)
335 || (flags & GOMP_TASK_FLAG_FINAL);
336 task.priority = priority;
337 if (thr->task)
339 task.in_tied_task = thr->task->in_tied_task;
340 task.taskgroup = thr->task->taskgroup;
342 thr->task = &task;
343 if (__builtin_expect (cpyfn != NULL, 0))
345 char buf[arg_size + arg_align - 1];
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 & ~(uintptr_t) (arg_align - 1));
348 cpyfn (arg, data);
349 fn (arg);
351 else
352 fn (data);
353 /* Access to "children" is normally done inside a task_lock
354 mutex region, but the only way this particular task.children
355 can be set is if this thread's task work function (fn)
356 creates children. So since the setter is *this* thread, we
357 need no barriers here when testing for non-NULL. We can have
358 task.children set by the current thread then changed by a
359 child thread, but seeing a stale non-NULL value is not a
360 problem. Once past the task_lock acquisition, this thread
361 will see the real value of task.children. */
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
364 gomp_mutex_lock (&team->task_lock);
365 gomp_clear_parent (&task.children_queue);
366 gomp_mutex_unlock (&team->task_lock);
368 gomp_end_task ();
370 else
372 struct gomp_task *task;
373 struct gomp_task *parent = thr->task;
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
375 char *arg;
376 bool do_wake;
377 size_t depend_size = 0;
379 if (flags & GOMP_TASK_FLAG_DEPEND)
380 depend_size = ((uintptr_t) depend[0]
381 * sizeof (struct gomp_task_depend_entry));
382 task = gomp_malloc (sizeof (*task) + depend_size
383 + arg_size + arg_align - 1);
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 & ~(uintptr_t) (arg_align - 1));
386 gomp_init_task (task, parent, gomp_icv (false));
387 task->priority = priority;
388 task->kind = GOMP_TASK_UNDEFERRED;
389 task->in_tied_task = parent->in_tied_task;
390 task->taskgroup = taskgroup;
391 thr->task = task;
392 if (cpyfn)
394 cpyfn (arg, data);
395 task->copy_ctors_done = true;
397 else
398 memcpy (arg, data, arg_size);
399 thr->task = parent;
400 task->kind = GOMP_TASK_WAITING;
401 task->fn = fn;
402 task->fn_data = arg;
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404 gomp_mutex_lock (&team->task_lock);
405 /* If parallel or taskgroup has been cancelled, don't start new
406 tasks. */
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 || (taskgroup && taskgroup->cancelled))
409 && !task->copy_ctors_done, 0))
411 gomp_mutex_unlock (&team->task_lock);
412 gomp_finish_task (task);
413 free (task);
414 return;
416 if (taskgroup)
417 taskgroup->num_children++;
418 if (depend_size)
420 gomp_task_handle_depend (task, parent, depend);
421 if (task->num_dependees)
423 /* Tasks that depend on other tasks are not put into the
424 various waiting queues, so we are done for now. Said
425 tasks are instead put into the queues via
426 gomp_task_run_post_handle_dependers() after their
427 dependencies have been satisfied. After which, they
428 can be picked up by the various scheduling
429 points. */
430 gomp_mutex_unlock (&team->task_lock);
431 return;
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 task, priority,
437 PRIORITY_INSERT_BEGIN,
438 /*adjust_parent_depends_on=*/false,
439 task->parent_depends_on);
440 if (taskgroup)
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 task, priority,
443 PRIORITY_INSERT_BEGIN,
444 /*adjust_parent_depends_on=*/false,
445 task->parent_depends_on);
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
448 task, priority,
449 PRIORITY_INSERT_END,
450 /*adjust_parent_depends_on=*/false,
451 task->parent_depends_on);
453 ++team->task_count;
454 ++team->task_queued_count;
455 gomp_team_barrier_set_task_pending (&team->barrier);
456 do_wake = team->task_running_count + !parent->in_tied_task
457 < team->nthreads;
458 gomp_mutex_unlock (&team->task_lock);
459 if (do_wake)
460 gomp_team_barrier_wake (&team->barrier, 1);
464 ialias (GOMP_taskgroup_start)
465 ialias (GOMP_taskgroup_end)
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 struct priority_queue *head,
486 struct gomp_task *task)
488 #if _LIBGOMP_CHECKING_
489 if (!priority_queue_task_in_queue_p (type, head, task))
490 gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492 struct priority_list *list;
493 if (priority_queue_multi_p (head))
495 list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497 if (!list)
498 gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
501 else
502 list = &head->l;
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
504 priority_list_insert (type, list, task, task->priority,
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 task->parent_depends_on);
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510 with team->task_lock held, or is executed in the thread that called
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512 run before it acquires team->task_lock. */
514 static void
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
517 struct gomp_task *parent = task->parent;
518 if (parent)
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 task);
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
523 if (taskgroup)
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 task);
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 PRIORITY_INSERT_BEGIN, false,
529 task->parent_depends_on);
530 task->kind = GOMP_TASK_WAITING;
531 if (parent && parent->taskwait)
533 if (parent->taskwait->in_taskwait)
535 /* One more task has had its dependencies met.
536 Inform any waiters. */
537 parent->taskwait->in_taskwait = false;
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
540 else if (parent->taskwait->in_depend_wait)
542 /* One more task has had its dependencies met.
543 Inform any waiters. */
544 parent->taskwait->in_depend_wait = false;
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
548 if (taskgroup && taskgroup->in_taskgroup_wait)
550 /* One more task has had its dependencies met.
551 Inform any waiters. */
552 taskgroup->in_taskgroup_wait = false;
553 gomp_sem_post (&taskgroup->taskgroup_sem);
556 ++team->task_queued_count;
557 gomp_team_barrier_set_task_pending (&team->barrier);
558 /* I'm afraid this can't be done after releasing team->task_lock,
559 as gomp_target_task_completion is run from unrelated thread and
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561 the team could be gone already. */
562 if (team->nthreads > team->task_running_count)
563 gomp_team_barrier_wake (&team->barrier, 1);
566 /* Signal that a target task TTASK has completed the asynchronously
567 running phase and should be requeued as a task to handle the
568 variable unmapping. */
570 void
571 GOMP_PLUGIN_target_task_completion (void *data)
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574 struct gomp_task *task = ttask->task;
575 struct gomp_team *team = ttask->team;
577 gomp_mutex_lock (&team->task_lock);
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
581 gomp_mutex_unlock (&team->task_lock);
582 return;
584 ttask->state = GOMP_TARGET_TASK_FINISHED;
585 free (ttask->firstprivate_copies);
586 gomp_target_task_completion (team, task);
587 gomp_mutex_unlock (&team->task_lock);
590 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
592 /* Called for nowait target tasks. */
594 bool
595 gomp_create_target_task (struct gomp_device_descr *devicep,
596 void (*fn) (void *), size_t mapnum, void **hostaddrs,
597 size_t *sizes, unsigned short *kinds,
598 unsigned int flags, void **depend, void **args,
599 enum gomp_target_task_state state)
601 struct gomp_thread *thr = gomp_thread ();
602 struct gomp_team *team = thr->ts.team;
604 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
605 if (team
606 && (gomp_team_barrier_cancelled (&team->barrier)
607 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
608 return true;
610 struct gomp_target_task *ttask;
611 struct gomp_task *task;
612 struct gomp_task *parent = thr->task;
613 struct gomp_taskgroup *taskgroup = parent->taskgroup;
614 bool do_wake;
615 size_t depend_size = 0;
616 uintptr_t depend_cnt = 0;
617 size_t tgt_align = 0, tgt_size = 0;
619 if (depend != NULL)
621 depend_cnt = (uintptr_t) depend[0];
622 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
624 if (fn)
626 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
627 firstprivate on the target task. */
628 size_t i;
629 for (i = 0; i < mapnum; i++)
630 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
632 size_t align = (size_t) 1 << (kinds[i] >> 8);
633 if (tgt_align < align)
634 tgt_align = align;
635 tgt_size = (tgt_size + align - 1) & ~(align - 1);
636 tgt_size += sizes[i];
638 if (tgt_align)
639 tgt_size += tgt_align - 1;
640 else
641 tgt_size = 0;
644 task = gomp_malloc (sizeof (*task) + depend_size
645 + sizeof (*ttask)
646 + mapnum * (sizeof (void *) + sizeof (size_t)
647 + sizeof (unsigned short))
648 + tgt_size);
649 gomp_init_task (task, parent, gomp_icv (false));
650 task->priority = 0;
651 task->kind = GOMP_TASK_WAITING;
652 task->in_tied_task = parent->in_tied_task;
653 task->taskgroup = taskgroup;
654 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
655 ttask->devicep = devicep;
656 ttask->fn = fn;
657 ttask->mapnum = mapnum;
658 ttask->args = args;
659 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
660 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
661 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
662 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
663 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
664 if (tgt_align)
666 char *tgt = (char *) &ttask->kinds[mapnum];
667 size_t i;
668 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
669 if (al)
670 tgt += tgt_align - al;
671 tgt_size = 0;
672 for (i = 0; i < mapnum; i++)
673 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
675 size_t align = (size_t) 1 << (kinds[i] >> 8);
676 tgt_size = (tgt_size + align - 1) & ~(align - 1);
677 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
678 ttask->hostaddrs[i] = tgt + tgt_size;
679 tgt_size = tgt_size + sizes[i];
682 ttask->flags = flags;
683 ttask->state = state;
684 ttask->task = task;
685 ttask->team = team;
686 ttask->firstprivate_copies = NULL;
687 task->fn = NULL;
688 task->fn_data = ttask;
689 task->final_task = 0;
690 gomp_mutex_lock (&team->task_lock);
691 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
692 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
693 || (taskgroup && taskgroup->cancelled), 0))
695 gomp_mutex_unlock (&team->task_lock);
696 gomp_finish_task (task);
697 free (task);
698 return true;
700 if (depend_size)
702 gomp_task_handle_depend (task, parent, depend);
703 if (task->num_dependees)
705 if (taskgroup)
706 taskgroup->num_children++;
707 gomp_mutex_unlock (&team->task_lock);
708 return true;
711 if (state == GOMP_TARGET_TASK_DATA)
713 gomp_task_run_post_handle_depend_hash (task);
714 gomp_mutex_unlock (&team->task_lock);
715 gomp_finish_task (task);
716 free (task);
717 return false;
719 if (taskgroup)
720 taskgroup->num_children++;
721 /* For async offloading, if we don't need to wait for dependencies,
722 run the gomp_target_task_fn right away, essentially schedule the
723 mapping part of the task in the current thread. */
724 if (devicep != NULL
725 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
727 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
728 PRIORITY_INSERT_END,
729 /*adjust_parent_depends_on=*/false,
730 task->parent_depends_on);
731 if (taskgroup)
732 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
733 task, 0, PRIORITY_INSERT_END,
734 /*adjust_parent_depends_on=*/false,
735 task->parent_depends_on);
736 task->pnode[PQ_TEAM].next = NULL;
737 task->pnode[PQ_TEAM].prev = NULL;
738 task->kind = GOMP_TASK_TIED;
739 ++team->task_count;
740 gomp_mutex_unlock (&team->task_lock);
742 thr->task = task;
743 gomp_target_task_fn (task->fn_data);
744 thr->task = parent;
746 gomp_mutex_lock (&team->task_lock);
747 task->kind = GOMP_TASK_ASYNC_RUNNING;
748 /* If GOMP_PLUGIN_target_task_completion has run already
749 in between gomp_target_task_fn and the mutex lock,
750 perform the requeuing here. */
751 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
752 gomp_target_task_completion (team, task);
753 else
754 ttask->state = GOMP_TARGET_TASK_RUNNING;
755 gomp_mutex_unlock (&team->task_lock);
756 return true;
758 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
759 PRIORITY_INSERT_BEGIN,
760 /*adjust_parent_depends_on=*/false,
761 task->parent_depends_on);
762 if (taskgroup)
763 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
764 PRIORITY_INSERT_BEGIN,
765 /*adjust_parent_depends_on=*/false,
766 task->parent_depends_on);
767 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
768 PRIORITY_INSERT_END,
769 /*adjust_parent_depends_on=*/false,
770 task->parent_depends_on);
771 ++team->task_count;
772 ++team->task_queued_count;
773 gomp_team_barrier_set_task_pending (&team->barrier);
774 do_wake = team->task_running_count + !parent->in_tied_task
775 < team->nthreads;
776 gomp_mutex_unlock (&team->task_lock);
777 if (do_wake)
778 gomp_team_barrier_wake (&team->barrier, 1);
779 return true;
782 /* Given a parent_depends_on task in LIST, move it to the front of its
783 priority so it is run as soon as possible.
785 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
787 We rearrange the queue such that all parent_depends_on tasks are
788 first, and last_parent_depends_on points to the last such task we
789 rearranged. For example, given the following tasks in a queue
790 where PD[123] are the parent_depends_on tasks:
792 task->children
795 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
797 We rearrange such that:
799 task->children
800 | +--- last_parent_depends_on
803 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
805 static void inline
806 priority_list_upgrade_task (struct priority_list *list,
807 struct priority_node *node)
809 struct priority_node *last_parent_depends_on
810 = list->last_parent_depends_on;
811 if (last_parent_depends_on)
813 node->prev->next = node->next;
814 node->next->prev = node->prev;
815 node->prev = last_parent_depends_on;
816 node->next = last_parent_depends_on->next;
817 node->prev->next = node;
818 node->next->prev = node;
820 else if (node != list->tasks)
822 node->prev->next = node->next;
823 node->next->prev = node->prev;
824 node->prev = list->tasks->prev;
825 node->next = list->tasks;
826 list->tasks = node;
827 node->prev->next = node;
828 node->next->prev = node;
830 list->last_parent_depends_on = node;
833 /* Given a parent_depends_on TASK in its parent's children_queue, move
834 it to the front of its priority so it is run as soon as possible.
836 PARENT is passed as an optimization.
838 (This function could be defined in priority_queue.c, but we want it
839 inlined, and putting it in priority_queue.h is not an option, given
840 that gomp_task has not been properly defined at that point). */
842 static void inline
843 priority_queue_upgrade_task (struct gomp_task *task,
844 struct gomp_task *parent)
846 struct priority_queue *head = &parent->children_queue;
847 struct priority_node *node = &task->pnode[PQ_CHILDREN];
848 #if _LIBGOMP_CHECKING_
849 if (!task->parent_depends_on)
850 gomp_fatal ("priority_queue_upgrade_task: task must be a "
851 "parent_depends_on task");
852 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
853 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
854 #endif
855 if (priority_queue_multi_p (head))
857 struct priority_list *list
858 = priority_queue_lookup_priority (head, task->priority);
859 priority_list_upgrade_task (list, node);
861 else
862 priority_list_upgrade_task (&head->l, node);
865 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
866 the way in LIST so that other tasks can be considered for
867 execution. LIST contains tasks of type TYPE.
869 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
870 if applicable. */
872 static void inline
873 priority_list_downgrade_task (enum priority_queue_type type,
874 struct priority_list *list,
875 struct gomp_task *child_task)
877 struct priority_node *node = task_to_priority_node (type, child_task);
878 if (list->tasks == node)
879 list->tasks = node->next;
880 else if (node->next != list->tasks)
882 /* The task in NODE is about to become TIED and TIED tasks
883 cannot come before WAITING tasks. If we're about to
884 leave the queue in such an indeterminate state, rewire
885 things appropriately. However, a TIED task at the end is
886 perfectly fine. */
887 struct gomp_task *next_task = priority_node_to_task (type, node->next);
888 if (next_task->kind == GOMP_TASK_WAITING)
890 /* Remove from list. */
891 node->prev->next = node->next;
892 node->next->prev = node->prev;
893 /* Rewire at the end. */
894 node->next = list->tasks;
895 node->prev = list->tasks->prev;
896 list->tasks->prev->next = node;
897 list->tasks->prev = node;
901 /* If the current task is the last_parent_depends_on for its
902 priority, adjust last_parent_depends_on appropriately. */
903 if (__builtin_expect (child_task->parent_depends_on, 0)
904 && list->last_parent_depends_on == node)
906 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
907 if (node->prev != node
908 && prev_child->kind == GOMP_TASK_WAITING
909 && prev_child->parent_depends_on)
910 list->last_parent_depends_on = node->prev;
911 else
913 /* There are no more parent_depends_on entries waiting
914 to run, clear the list. */
915 list->last_parent_depends_on = NULL;
920 /* Given a TASK in HEAD that is about to be executed, move it out of
921 the way so that other tasks can be considered for execution. HEAD
922 contains tasks of type TYPE.
924 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
925 if applicable.
927 (This function could be defined in priority_queue.c, but we want it
928 inlined, and putting it in priority_queue.h is not an option, given
929 that gomp_task has not been properly defined at that point). */
931 static void inline
932 priority_queue_downgrade_task (enum priority_queue_type type,
933 struct priority_queue *head,
934 struct gomp_task *task)
936 #if _LIBGOMP_CHECKING_
937 if (!priority_queue_task_in_queue_p (type, head, task))
938 gomp_fatal ("Attempt to downgrade missing task %p", task);
939 #endif
940 if (priority_queue_multi_p (head))
942 struct priority_list *list
943 = priority_queue_lookup_priority (head, task->priority);
944 priority_list_downgrade_task (type, list, task);
946 else
947 priority_list_downgrade_task (type, &head->l, task);
950 /* Setup CHILD_TASK to execute. This is done by setting the task to
951 TIED, and updating all relevant queues so that CHILD_TASK is no
952 longer chosen for scheduling. Also, remove CHILD_TASK from the
953 overall team task queue entirely.
955 Return TRUE if task or its containing taskgroup has been
956 cancelled. */
958 static inline bool
959 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
960 struct gomp_team *team)
962 #if _LIBGOMP_CHECKING_
963 if (child_task->parent)
964 priority_queue_verify (PQ_CHILDREN,
965 &child_task->parent->children_queue, true);
966 if (child_task->taskgroup)
967 priority_queue_verify (PQ_TASKGROUP,
968 &child_task->taskgroup->taskgroup_queue, false);
969 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
970 #endif
972 /* Task is about to go tied, move it out of the way. */
973 if (parent)
974 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
975 child_task);
977 /* Task is about to go tied, move it out of the way. */
978 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
979 if (taskgroup)
980 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
981 child_task);
983 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
984 MEMMODEL_RELAXED);
985 child_task->pnode[PQ_TEAM].next = NULL;
986 child_task->pnode[PQ_TEAM].prev = NULL;
987 child_task->kind = GOMP_TASK_TIED;
989 if (--team->task_queued_count == 0)
990 gomp_team_barrier_clear_task_pending (&team->barrier);
991 if ((gomp_team_barrier_cancelled (&team->barrier)
992 || (taskgroup && taskgroup->cancelled))
993 && !child_task->copy_ctors_done)
994 return true;
995 return false;
998 static void
999 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1001 struct gomp_task *parent = child_task->parent;
1002 size_t i;
1004 for (i = 0; i < child_task->depend_count; i++)
1005 if (!child_task->depend[i].redundant)
1007 if (child_task->depend[i].next)
1008 child_task->depend[i].next->prev = child_task->depend[i].prev;
1009 if (child_task->depend[i].prev)
1010 child_task->depend[i].prev->next = child_task->depend[i].next;
1011 else
1013 hash_entry_type *slot
1014 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1015 NO_INSERT);
1016 if (*slot != &child_task->depend[i])
1017 abort ();
1018 if (child_task->depend[i].next)
1019 *slot = child_task->depend[i].next;
1020 else
1021 htab_clear_slot (parent->depend_hash, slot);
1026 /* After a CHILD_TASK has been run, adjust the dependency queue for
1027 each task that depends on CHILD_TASK, to record the fact that there
1028 is one less dependency to worry about. If a task that depended on
1029 CHILD_TASK now has no dependencies, place it in the various queues
1030 so it gets scheduled to run.
1032 TEAM is the team to which CHILD_TASK belongs to. */
1034 static size_t
1035 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1036 struct gomp_team *team)
1038 struct gomp_task *parent = child_task->parent;
1039 size_t i, count = child_task->dependers->n_elem, ret = 0;
1040 for (i = 0; i < count; i++)
1042 struct gomp_task *task = child_task->dependers->elem[i];
1044 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1045 TASK's remaining dependencies. Once TASK has no other
1046 depenencies, put it into the various queues so it will get
1047 scheduled for execution. */
1048 if (--task->num_dependees != 0)
1049 continue;
1051 struct gomp_taskgroup *taskgroup = task->taskgroup;
1052 if (parent)
1054 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1055 task, task->priority,
1056 PRIORITY_INSERT_BEGIN,
1057 /*adjust_parent_depends_on=*/true,
1058 task->parent_depends_on);
1059 if (parent->taskwait)
1061 if (parent->taskwait->in_taskwait)
1063 /* One more task has had its dependencies met.
1064 Inform any waiters. */
1065 parent->taskwait->in_taskwait = false;
1066 gomp_sem_post (&parent->taskwait->taskwait_sem);
1068 else if (parent->taskwait->in_depend_wait)
1070 /* One more task has had its dependencies met.
1071 Inform any waiters. */
1072 parent->taskwait->in_depend_wait = false;
1073 gomp_sem_post (&parent->taskwait->taskwait_sem);
1077 if (taskgroup)
1079 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1080 task, task->priority,
1081 PRIORITY_INSERT_BEGIN,
1082 /*adjust_parent_depends_on=*/false,
1083 task->parent_depends_on);
1084 if (taskgroup->in_taskgroup_wait)
1086 /* One more task has had its dependencies met.
1087 Inform any waiters. */
1088 taskgroup->in_taskgroup_wait = false;
1089 gomp_sem_post (&taskgroup->taskgroup_sem);
1092 priority_queue_insert (PQ_TEAM, &team->task_queue,
1093 task, task->priority,
1094 PRIORITY_INSERT_END,
1095 /*adjust_parent_depends_on=*/false,
1096 task->parent_depends_on);
1097 ++team->task_count;
1098 ++team->task_queued_count;
1099 ++ret;
1101 free (child_task->dependers);
1102 child_task->dependers = NULL;
1103 if (ret > 1)
1104 gomp_team_barrier_set_task_pending (&team->barrier);
1105 return ret;
1108 static inline size_t
1109 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1110 struct gomp_team *team)
1112 if (child_task->depend_count == 0)
1113 return 0;
1115 /* If parent is gone already, the hash table is freed and nothing
1116 will use the hash table anymore, no need to remove anything from it. */
1117 if (child_task->parent != NULL)
1118 gomp_task_run_post_handle_depend_hash (child_task);
1120 if (child_task->dependers == NULL)
1121 return 0;
1123 return gomp_task_run_post_handle_dependers (child_task, team);
1126 /* Remove CHILD_TASK from its parent. */
1128 static inline void
1129 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1131 struct gomp_task *parent = child_task->parent;
1132 if (parent == NULL)
1133 return;
1135 /* If this was the last task the parent was depending on,
1136 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1137 clean up and return. */
1138 if (__builtin_expect (child_task->parent_depends_on, 0)
1139 && --parent->taskwait->n_depend == 0
1140 && parent->taskwait->in_depend_wait)
1142 parent->taskwait->in_depend_wait = false;
1143 gomp_sem_post (&parent->taskwait->taskwait_sem);
1146 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1147 child_task, MEMMODEL_RELEASE)
1148 && parent->taskwait && parent->taskwait->in_taskwait)
1150 parent->taskwait->in_taskwait = false;
1151 gomp_sem_post (&parent->taskwait->taskwait_sem);
1153 child_task->pnode[PQ_CHILDREN].next = NULL;
1154 child_task->pnode[PQ_CHILDREN].prev = NULL;
1157 /* Remove CHILD_TASK from its taskgroup. */
1159 static inline void
1160 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1162 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1163 if (taskgroup == NULL)
1164 return;
1165 bool empty = priority_queue_remove (PQ_TASKGROUP,
1166 &taskgroup->taskgroup_queue,
1167 child_task, MEMMODEL_RELAXED);
1168 child_task->pnode[PQ_TASKGROUP].next = NULL;
1169 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1170 if (taskgroup->num_children > 1)
1171 --taskgroup->num_children;
1172 else
1174 /* We access taskgroup->num_children in GOMP_taskgroup_end
1175 outside of the task lock mutex region, so
1176 need a release barrier here to ensure memory
1177 written by child_task->fn above is flushed
1178 before the NULL is written. */
1179 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1181 if (empty && taskgroup->in_taskgroup_wait)
1183 taskgroup->in_taskgroup_wait = false;
1184 gomp_sem_post (&taskgroup->taskgroup_sem);
1188 void
1189 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1191 struct gomp_thread *thr = gomp_thread ();
1192 struct gomp_team *team = thr->ts.team;
1193 struct gomp_task *task = thr->task;
1194 struct gomp_task *child_task = NULL;
1195 struct gomp_task *to_free = NULL;
1196 int do_wake = 0;
1198 gomp_mutex_lock (&team->task_lock);
1199 if (gomp_barrier_last_thread (state))
1201 if (team->task_count == 0)
1203 gomp_team_barrier_done (&team->barrier, state);
1204 gomp_mutex_unlock (&team->task_lock);
1205 gomp_team_barrier_wake (&team->barrier, 0);
1206 return;
1208 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1211 while (1)
1213 bool cancelled = false;
1214 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1216 bool ignored;
1217 child_task
1218 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1219 PQ_IGNORED, NULL,
1220 &ignored);
1221 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1222 team);
1223 if (__builtin_expect (cancelled, 0))
1225 if (to_free)
1227 gomp_finish_task (to_free);
1228 free (to_free);
1229 to_free = NULL;
1231 goto finish_cancelled;
1233 team->task_running_count++;
1234 child_task->in_tied_task = true;
1236 gomp_mutex_unlock (&team->task_lock);
1237 if (do_wake)
1239 gomp_team_barrier_wake (&team->barrier, do_wake);
1240 do_wake = 0;
1242 if (to_free)
1244 gomp_finish_task (to_free);
1245 free (to_free);
1246 to_free = NULL;
1248 if (child_task)
1250 thr->task = child_task;
1251 if (__builtin_expect (child_task->fn == NULL, 0))
1253 if (gomp_target_task_fn (child_task->fn_data))
1255 thr->task = task;
1256 gomp_mutex_lock (&team->task_lock);
1257 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1258 team->task_running_count--;
1259 struct gomp_target_task *ttask
1260 = (struct gomp_target_task *) child_task->fn_data;
1261 /* If GOMP_PLUGIN_target_task_completion has run already
1262 in between gomp_target_task_fn and the mutex lock,
1263 perform the requeuing here. */
1264 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1265 gomp_target_task_completion (team, child_task);
1266 else
1267 ttask->state = GOMP_TARGET_TASK_RUNNING;
1268 child_task = NULL;
1269 continue;
1272 else
1273 child_task->fn (child_task->fn_data);
1274 thr->task = task;
1276 else
1277 return;
1278 gomp_mutex_lock (&team->task_lock);
1279 if (child_task)
1281 finish_cancelled:;
1282 size_t new_tasks
1283 = gomp_task_run_post_handle_depend (child_task, team);
1284 gomp_task_run_post_remove_parent (child_task);
1285 gomp_clear_parent (&child_task->children_queue);
1286 gomp_task_run_post_remove_taskgroup (child_task);
1287 to_free = child_task;
1288 child_task = NULL;
1289 if (!cancelled)
1290 team->task_running_count--;
1291 if (new_tasks > 1)
1293 do_wake = team->nthreads - team->task_running_count;
1294 if (do_wake > new_tasks)
1295 do_wake = new_tasks;
1297 if (--team->task_count == 0
1298 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1300 gomp_team_barrier_done (&team->barrier, state);
1301 gomp_mutex_unlock (&team->task_lock);
1302 gomp_team_barrier_wake (&team->barrier, 0);
1303 gomp_mutex_lock (&team->task_lock);
1309 /* Called when encountering a taskwait directive.
1311 Wait for all children of the current task. */
1313 void
1314 GOMP_taskwait (void)
1316 struct gomp_thread *thr = gomp_thread ();
1317 struct gomp_team *team = thr->ts.team;
1318 struct gomp_task *task = thr->task;
1319 struct gomp_task *child_task = NULL;
1320 struct gomp_task *to_free = NULL;
1321 struct gomp_taskwait taskwait;
1322 int do_wake = 0;
1324 /* The acquire barrier on load of task->children here synchronizes
1325 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1326 not necessary that we synchronize with other non-NULL writes at
1327 this point, but we must ensure that all writes to memory by a
1328 child thread task work function are seen before we exit from
1329 GOMP_taskwait. */
1330 if (task == NULL
1331 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1332 return;
1334 memset (&taskwait, 0, sizeof (taskwait));
1335 bool child_q = false;
1336 gomp_mutex_lock (&team->task_lock);
1337 while (1)
1339 bool cancelled = false;
1340 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1342 bool destroy_taskwait = task->taskwait != NULL;
1343 task->taskwait = NULL;
1344 gomp_mutex_unlock (&team->task_lock);
1345 if (to_free)
1347 gomp_finish_task (to_free);
1348 free (to_free);
1350 if (destroy_taskwait)
1351 gomp_sem_destroy (&taskwait.taskwait_sem);
1352 return;
1354 struct gomp_task *next_task
1355 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1356 PQ_TEAM, &team->task_queue, &child_q);
1357 if (next_task->kind == GOMP_TASK_WAITING)
1359 child_task = next_task;
1360 cancelled
1361 = gomp_task_run_pre (child_task, task, team);
1362 if (__builtin_expect (cancelled, 0))
1364 if (to_free)
1366 gomp_finish_task (to_free);
1367 free (to_free);
1368 to_free = NULL;
1370 goto finish_cancelled;
1373 else
1375 /* All tasks we are waiting for are either running in other
1376 threads, or they are tasks that have not had their
1377 dependencies met (so they're not even in the queue). Wait
1378 for them. */
1379 if (task->taskwait == NULL)
1381 taskwait.in_depend_wait = false;
1382 gomp_sem_init (&taskwait.taskwait_sem, 0);
1383 task->taskwait = &taskwait;
1385 taskwait.in_taskwait = true;
1387 gomp_mutex_unlock (&team->task_lock);
1388 if (do_wake)
1390 gomp_team_barrier_wake (&team->barrier, do_wake);
1391 do_wake = 0;
1393 if (to_free)
1395 gomp_finish_task (to_free);
1396 free (to_free);
1397 to_free = NULL;
1399 if (child_task)
1401 thr->task = child_task;
1402 if (__builtin_expect (child_task->fn == NULL, 0))
1404 if (gomp_target_task_fn (child_task->fn_data))
1406 thr->task = task;
1407 gomp_mutex_lock (&team->task_lock);
1408 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1409 struct gomp_target_task *ttask
1410 = (struct gomp_target_task *) child_task->fn_data;
1411 /* If GOMP_PLUGIN_target_task_completion has run already
1412 in between gomp_target_task_fn and the mutex lock,
1413 perform the requeuing here. */
1414 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1415 gomp_target_task_completion (team, child_task);
1416 else
1417 ttask->state = GOMP_TARGET_TASK_RUNNING;
1418 child_task = NULL;
1419 continue;
1422 else
1423 child_task->fn (child_task->fn_data);
1424 thr->task = task;
1426 else
1427 gomp_sem_wait (&taskwait.taskwait_sem);
1428 gomp_mutex_lock (&team->task_lock);
1429 if (child_task)
1431 finish_cancelled:;
1432 size_t new_tasks
1433 = gomp_task_run_post_handle_depend (child_task, team);
1435 if (child_q)
1437 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1438 child_task, MEMMODEL_RELAXED);
1439 child_task->pnode[PQ_CHILDREN].next = NULL;
1440 child_task->pnode[PQ_CHILDREN].prev = NULL;
1443 gomp_clear_parent (&child_task->children_queue);
1445 gomp_task_run_post_remove_taskgroup (child_task);
1447 to_free = child_task;
1448 child_task = NULL;
1449 team->task_count--;
1450 if (new_tasks > 1)
1452 do_wake = team->nthreads - team->task_running_count
1453 - !task->in_tied_task;
1454 if (do_wake > new_tasks)
1455 do_wake = new_tasks;
1461 /* An undeferred task is about to run. Wait for all tasks that this
1462 undeferred task depends on.
1464 This is done by first putting all known ready dependencies
1465 (dependencies that have their own dependencies met) at the top of
1466 the scheduling queues. Then we iterate through these imminently
1467 ready tasks (and possibly other high priority tasks), and run them.
1468 If we run out of ready dependencies to execute, we either wait for
1469 the reamining dependencies to finish, or wait for them to get
1470 scheduled so we can run them.
1472 DEPEND is as in GOMP_task. */
1474 void
1475 gomp_task_maybe_wait_for_dependencies (void **depend)
1477 struct gomp_thread *thr = gomp_thread ();
1478 struct gomp_task *task = thr->task;
1479 struct gomp_team *team = thr->ts.team;
1480 struct gomp_task_depend_entry elem, *ent = NULL;
1481 struct gomp_taskwait taskwait;
1482 size_t ndepend = (uintptr_t) depend[0];
1483 size_t nout = (uintptr_t) depend[1];
1484 size_t i;
1485 size_t num_awaited = 0;
1486 struct gomp_task *child_task = NULL;
1487 struct gomp_task *to_free = NULL;
1488 int do_wake = 0;
1490 gomp_mutex_lock (&team->task_lock);
1491 for (i = 0; i < ndepend; i++)
1493 elem.addr = depend[i + 2];
1494 ent = htab_find (task->depend_hash, &elem);
1495 for (; ent; ent = ent->next)
1496 if (i >= nout && ent->is_in)
1497 continue;
1498 else
1500 struct gomp_task *tsk = ent->task;
1501 if (!tsk->parent_depends_on)
1503 tsk->parent_depends_on = true;
1504 ++num_awaited;
1505 /* If depenency TSK itself has no dependencies and is
1506 ready to run, move it up front so that we run it as
1507 soon as possible. */
1508 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1509 priority_queue_upgrade_task (tsk, task);
1513 if (num_awaited == 0)
1515 gomp_mutex_unlock (&team->task_lock);
1516 return;
1519 memset (&taskwait, 0, sizeof (taskwait));
1520 taskwait.n_depend = num_awaited;
1521 gomp_sem_init (&taskwait.taskwait_sem, 0);
1522 task->taskwait = &taskwait;
1524 while (1)
1526 bool cancelled = false;
1527 if (taskwait.n_depend == 0)
1529 task->taskwait = NULL;
1530 gomp_mutex_unlock (&team->task_lock);
1531 if (to_free)
1533 gomp_finish_task (to_free);
1534 free (to_free);
1536 gomp_sem_destroy (&taskwait.taskwait_sem);
1537 return;
1540 /* Theoretically when we have multiple priorities, we should
1541 chose between the highest priority item in
1542 task->children_queue and team->task_queue here, so we should
1543 use priority_queue_next_task(). However, since we are
1544 running an undeferred task, perhaps that makes all tasks it
1545 depends on undeferred, thus a priority of INF? This would
1546 make it unnecessary to take anything into account here,
1547 but the dependencies.
1549 On the other hand, if we want to use priority_queue_next_task(),
1550 care should be taken to only use priority_queue_remove()
1551 below if the task was actually removed from the children
1552 queue. */
1553 bool ignored;
1554 struct gomp_task *next_task
1555 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1556 PQ_IGNORED, NULL, &ignored);
1558 if (next_task->kind == GOMP_TASK_WAITING)
1560 child_task = next_task;
1561 cancelled
1562 = gomp_task_run_pre (child_task, task, team);
1563 if (__builtin_expect (cancelled, 0))
1565 if (to_free)
1567 gomp_finish_task (to_free);
1568 free (to_free);
1569 to_free = NULL;
1571 goto finish_cancelled;
1574 else
1575 /* All tasks we are waiting for are either running in other
1576 threads, or they are tasks that have not had their
1577 dependencies met (so they're not even in the queue). Wait
1578 for them. */
1579 taskwait.in_depend_wait = true;
1580 gomp_mutex_unlock (&team->task_lock);
1581 if (do_wake)
1583 gomp_team_barrier_wake (&team->barrier, do_wake);
1584 do_wake = 0;
1586 if (to_free)
1588 gomp_finish_task (to_free);
1589 free (to_free);
1590 to_free = NULL;
1592 if (child_task)
1594 thr->task = child_task;
1595 if (__builtin_expect (child_task->fn == NULL, 0))
1597 if (gomp_target_task_fn (child_task->fn_data))
1599 thr->task = task;
1600 gomp_mutex_lock (&team->task_lock);
1601 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1602 struct gomp_target_task *ttask
1603 = (struct gomp_target_task *) child_task->fn_data;
1604 /* If GOMP_PLUGIN_target_task_completion has run already
1605 in between gomp_target_task_fn and the mutex lock,
1606 perform the requeuing here. */
1607 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1608 gomp_target_task_completion (team, child_task);
1609 else
1610 ttask->state = GOMP_TARGET_TASK_RUNNING;
1611 child_task = NULL;
1612 continue;
1615 else
1616 child_task->fn (child_task->fn_data);
1617 thr->task = task;
1619 else
1620 gomp_sem_wait (&taskwait.taskwait_sem);
1621 gomp_mutex_lock (&team->task_lock);
1622 if (child_task)
1624 finish_cancelled:;
1625 size_t new_tasks
1626 = gomp_task_run_post_handle_depend (child_task, team);
1627 if (child_task->parent_depends_on)
1628 --taskwait.n_depend;
1630 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1631 child_task, MEMMODEL_RELAXED);
1632 child_task->pnode[PQ_CHILDREN].next = NULL;
1633 child_task->pnode[PQ_CHILDREN].prev = NULL;
1635 gomp_clear_parent (&child_task->children_queue);
1636 gomp_task_run_post_remove_taskgroup (child_task);
1637 to_free = child_task;
1638 child_task = NULL;
1639 team->task_count--;
1640 if (new_tasks > 1)
1642 do_wake = team->nthreads - team->task_running_count
1643 - !task->in_tied_task;
1644 if (do_wake > new_tasks)
1645 do_wake = new_tasks;
1651 /* Called when encountering a taskyield directive. */
1653 void
1654 GOMP_taskyield (void)
1656 /* Nothing at the moment. */
1659 void
1660 GOMP_taskgroup_start (void)
1662 struct gomp_thread *thr = gomp_thread ();
1663 struct gomp_team *team = thr->ts.team;
1664 struct gomp_task *task = thr->task;
1665 struct gomp_taskgroup *taskgroup;
1667 /* If team is NULL, all tasks are executed as
1668 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1669 taskgroup and their descendant tasks will be finished
1670 by the time GOMP_taskgroup_end is called. */
1671 if (team == NULL)
1672 return;
1673 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1674 taskgroup->prev = task->taskgroup;
1675 priority_queue_init (&taskgroup->taskgroup_queue);
1676 taskgroup->in_taskgroup_wait = false;
1677 taskgroup->cancelled = false;
1678 taskgroup->num_children = 0;
1679 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1680 task->taskgroup = taskgroup;
1683 void
1684 GOMP_taskgroup_end (void)
1686 struct gomp_thread *thr = gomp_thread ();
1687 struct gomp_team *team = thr->ts.team;
1688 struct gomp_task *task = thr->task;
1689 struct gomp_taskgroup *taskgroup;
1690 struct gomp_task *child_task = NULL;
1691 struct gomp_task *to_free = NULL;
1692 int do_wake = 0;
1694 if (team == NULL)
1695 return;
1696 taskgroup = task->taskgroup;
1697 if (__builtin_expect (taskgroup == NULL, 0)
1698 && thr->ts.level == 0)
1700 /* This can happen if GOMP_taskgroup_start is called when
1701 thr->ts.team == NULL, but inside of the taskgroup there
1702 is #pragma omp target nowait that creates an implicit
1703 team with a single thread. In this case, we want to wait
1704 for all outstanding tasks in this team. */
1705 gomp_team_barrier_wait (&team->barrier);
1706 return;
1709 /* The acquire barrier on load of taskgroup->num_children here
1710 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1711 It is not necessary that we synchronize with other non-0 writes at
1712 this point, but we must ensure that all writes to memory by a
1713 child thread task work function are seen before we exit from
1714 GOMP_taskgroup_end. */
1715 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1716 goto finish;
1718 bool unused;
1719 gomp_mutex_lock (&team->task_lock);
1720 while (1)
1722 bool cancelled = false;
1723 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1724 MEMMODEL_RELAXED))
1726 if (taskgroup->num_children)
1728 if (priority_queue_empty_p (&task->children_queue,
1729 MEMMODEL_RELAXED))
1730 goto do_wait;
1731 child_task
1732 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1733 PQ_TEAM, &team->task_queue,
1734 &unused);
1736 else
1738 gomp_mutex_unlock (&team->task_lock);
1739 if (to_free)
1741 gomp_finish_task (to_free);
1742 free (to_free);
1744 goto finish;
1747 else
1748 child_task
1749 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1750 PQ_TEAM, &team->task_queue, &unused);
1751 if (child_task->kind == GOMP_TASK_WAITING)
1753 cancelled
1754 = gomp_task_run_pre (child_task, child_task->parent, team);
1755 if (__builtin_expect (cancelled, 0))
1757 if (to_free)
1759 gomp_finish_task (to_free);
1760 free (to_free);
1761 to_free = NULL;
1763 goto finish_cancelled;
1766 else
1768 child_task = NULL;
1769 do_wait:
1770 /* All tasks we are waiting for are either running in other
1771 threads, or they are tasks that have not had their
1772 dependencies met (so they're not even in the queue). Wait
1773 for them. */
1774 taskgroup->in_taskgroup_wait = true;
1776 gomp_mutex_unlock (&team->task_lock);
1777 if (do_wake)
1779 gomp_team_barrier_wake (&team->barrier, do_wake);
1780 do_wake = 0;
1782 if (to_free)
1784 gomp_finish_task (to_free);
1785 free (to_free);
1786 to_free = NULL;
1788 if (child_task)
1790 thr->task = child_task;
1791 if (__builtin_expect (child_task->fn == NULL, 0))
1793 if (gomp_target_task_fn (child_task->fn_data))
1795 thr->task = task;
1796 gomp_mutex_lock (&team->task_lock);
1797 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1798 struct gomp_target_task *ttask
1799 = (struct gomp_target_task *) child_task->fn_data;
1800 /* If GOMP_PLUGIN_target_task_completion has run already
1801 in between gomp_target_task_fn and the mutex lock,
1802 perform the requeuing here. */
1803 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1804 gomp_target_task_completion (team, child_task);
1805 else
1806 ttask->state = GOMP_TARGET_TASK_RUNNING;
1807 child_task = NULL;
1808 continue;
1811 else
1812 child_task->fn (child_task->fn_data);
1813 thr->task = task;
1815 else
1816 gomp_sem_wait (&taskgroup->taskgroup_sem);
1817 gomp_mutex_lock (&team->task_lock);
1818 if (child_task)
1820 finish_cancelled:;
1821 size_t new_tasks
1822 = gomp_task_run_post_handle_depend (child_task, team);
1823 gomp_task_run_post_remove_parent (child_task);
1824 gomp_clear_parent (&child_task->children_queue);
1825 gomp_task_run_post_remove_taskgroup (child_task);
1826 to_free = child_task;
1827 child_task = NULL;
1828 team->task_count--;
1829 if (new_tasks > 1)
1831 do_wake = team->nthreads - team->task_running_count
1832 - !task->in_tied_task;
1833 if (do_wake > new_tasks)
1834 do_wake = new_tasks;
1839 finish:
1840 task->taskgroup = taskgroup->prev;
1841 gomp_sem_destroy (&taskgroup->taskgroup_sem);
1842 free (taskgroup);
1846 omp_in_final (void)
1848 struct gomp_thread *thr = gomp_thread ();
1849 return thr->task && thr->task->final_task;
1852 ialias (omp_in_final)