xfail dg-final "Sunk statements: 5" on hppa*64*-*-*
[official-gcc.git] / libgomp / task.c
blobe2d7f21de62b3fb182a55adbfd2c47659a49cf7d
1 /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
35 typedef struct gomp_task_depend_entry *hash_entry_type;
37 static inline void *
38 htab_alloc (size_t size)
40 return gomp_malloc (size);
43 static inline void
44 htab_free (void *ptr)
46 free (ptr);
49 #include "hashtab.h"
51 static inline hashval_t
52 htab_hash (hash_entry_type element)
54 return hash_pointer (element->addr);
57 static inline bool
58 htab_eq (hash_entry_type x, hash_entry_type y)
60 return x->addr == y->addr;
63 /* Create a new task data structure. */
65 void
66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 struct gomp_task_icv *prev_icv)
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task->parent = parent_task;
78 priority_queue_init (&task->children_queue);
79 task->taskgroup = NULL;
80 task->dependers = NULL;
81 task->depend_hash = NULL;
82 task->taskwait = NULL;
83 task->depend_all_memory = NULL;
84 task->depend_count = 0;
85 task->completion_sem = NULL;
86 task->deferred_p = false;
87 task->icv = *prev_icv;
88 task->kind = GOMP_TASK_IMPLICIT;
89 task->in_tied_task = false;
90 task->final_task = false;
91 task->copy_ctors_done = false;
92 task->parent_depends_on = false;
95 /* Clean up a task, after completing it. */
97 void
98 gomp_end_task (void)
100 struct gomp_thread *thr = gomp_thread ();
101 struct gomp_task *task = thr->task;
103 gomp_finish_task (task);
104 thr->task = task->parent;
107 /* Clear the parent field of every task in LIST. */
109 static inline void
110 gomp_clear_parent_in_list (struct priority_list *list)
112 struct priority_node *p = list->tasks;
113 if (p)
116 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
117 p = p->next;
119 while (p != list->tasks);
122 /* Splay tree version of gomp_clear_parent_in_list.
124 Clear the parent field of every task in NODE within SP, and free
125 the node when done. */
127 static void
128 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
130 if (!node)
131 return;
132 prio_splay_tree_node left = node->left, right = node->right;
133 gomp_clear_parent_in_list (&node->key.l);
134 #if _LIBGOMP_CHECKING_
135 memset (node, 0xaf, sizeof (*node));
136 #endif
137 /* No need to remove the node from the tree. We're nuking
138 everything, so just free the nodes and our caller can clear the
139 entire splay tree. */
140 free (node);
141 gomp_clear_parent_in_tree (sp, left);
142 gomp_clear_parent_in_tree (sp, right);
145 /* Clear the parent field of every task in Q and remove every task
146 from Q. */
148 static inline void
149 gomp_clear_parent (struct priority_queue *q)
151 if (priority_queue_multi_p (q))
153 gomp_clear_parent_in_tree (&q->t, q->t.root);
154 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
155 No need to remove anything. We can just nuke everything. */
156 q->t.root = NULL;
158 else
159 gomp_clear_parent_in_list (&q->l);
162 /* Helper function for GOMP_task and gomp_create_target_task.
164 For a TASK with in/out dependencies, fill in the various dependency
165 queues. PARENT is the parent of said task. DEPEND is as in
166 GOMP_task. */
168 static void
169 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
170 void **depend)
172 size_t ndepend = (uintptr_t) depend[0];
173 size_t i;
174 hash_entry_type ent;
175 bool all_memory = false;
177 if (ndepend)
179 /* depend[0] is total # */
180 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
181 /* ndepend - nout is # of in: */
182 for (i = 0; i < ndepend; i++)
184 task->depend[i].addr = depend[2 + i];
185 task->depend[i].is_in = i >= nout;
186 all_memory |= i < nout && depend[2 + i] == NULL;
189 else
191 ndepend = (uintptr_t) depend[1]; /* total # */
192 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
193 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
194 /* For now we treat mutexinoutset like out, which is compliant, but
195 inefficient. */
196 size_t nin = (uintptr_t) depend[4]; /* # of in: */
197 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
198 size_t normal = nout + nmutexinoutset + nin;
199 size_t n = 0;
200 bool has_in = false;
201 for (i = normal; i < ndepend; i++)
203 void **d = (void **) (uintptr_t) depend[5 + i];
204 switch ((uintptr_t) d[1])
206 case GOMP_DEPEND_OUT:
207 case GOMP_DEPEND_INOUT:
208 all_memory |= d[0] == NULL;
209 break;
210 case GOMP_DEPEND_MUTEXINOUTSET:
211 break;
212 case GOMP_DEPEND_IN:
213 case GOMP_DEPEND_INOUTSET:
214 has_in = true;
215 continue;
216 default:
217 gomp_fatal ("unknown omp_depend_t dependence type %d",
218 (int) (uintptr_t) d[1]);
220 task->depend[n].addr = d[0];
221 task->depend[n++].is_in = 0;
223 for (i = 0; i < normal; i++)
225 task->depend[n].addr = depend[5 + i];
226 task->depend[n++].is_in = i >= nout + nmutexinoutset;
228 if (has_in)
229 for (i = normal; i < ndepend; i++)
231 void **d = (void **) (uintptr_t) depend[5 + i];
232 if ((uintptr_t) d[1] != GOMP_DEPEND_IN
233 && (uintptr_t) d[1] != GOMP_DEPEND_INOUTSET)
234 continue;
235 task->depend[n].addr = d[0];
236 task->depend[n++].is_in
237 = 1 + ((uintptr_t) d[1] == GOMP_DEPEND_INOUTSET);
240 task->num_dependees = 0;
241 if (__builtin_expect (parent->depend_all_memory && ndepend, false))
243 struct gomp_task *tsk = parent->depend_all_memory;
244 if (tsk->dependers == NULL)
246 tsk->dependers
247 = gomp_malloc (sizeof (struct gomp_dependers_vec)
248 + 6 * sizeof (struct gomp_task *));
249 tsk->dependers->n_elem = 1;
250 tsk->dependers->allocated = 6;
251 tsk->dependers->elem[0] = task;
253 else
255 if (tsk->dependers->n_elem == tsk->dependers->allocated)
257 tsk->dependers->allocated
258 = tsk->dependers->allocated * 2 + 2;
259 tsk->dependers
260 = gomp_realloc (tsk->dependers,
261 sizeof (struct gomp_dependers_vec)
262 + (tsk->dependers->allocated
263 * sizeof (struct gomp_task *)));
265 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
267 task->num_dependees++;
269 if (__builtin_expect (all_memory, false))
271 /* A task with depend(inout: omp_all_memory) depends on all previous
272 sibling tasks which have any dependencies and all later sibling
273 tasks which have any dependencies depend on it. */
274 task->depend_count = 1;
275 task->depend[0].addr = NULL;
276 task->depend[0].next = NULL;
277 task->depend[0].prev = NULL;
278 task->depend[0].task = task;
279 task->depend[0].redundant = true;
280 task->depend[0].redundant_out = false;
281 if (parent->depend_hash)
283 /* Inlined htab_traverse + htab_clear. All newer siblings can
284 just depend on this task. Add dependencies on all previous
285 sibling tasks with dependencies and make them redundant and
286 clear the hash table. */
287 hash_entry_type *slot = &parent->depend_hash->entries[0];
288 hash_entry_type *end = slot + htab_size (parent->depend_hash);
289 for (; slot != end; ++slot)
291 if (*slot == HTAB_EMPTY_ENTRY)
292 continue;
293 if (*slot != HTAB_DELETED_ENTRY)
295 for (ent = *slot; ent; ent = ent->next)
297 struct gomp_task *tsk = ent->task;
299 if (ent->redundant_out)
300 break;
302 ent->redundant = true;
303 if (tsk->dependers == NULL)
305 tsk->dependers
306 = gomp_malloc (sizeof (struct gomp_dependers_vec)
307 + 6 * sizeof (struct gomp_task *));
308 tsk->dependers->n_elem = 1;
309 tsk->dependers->allocated = 6;
310 tsk->dependers->elem[0] = task;
311 task->num_dependees++;
312 continue;
314 /* We already have some other dependency on tsk from
315 earlier depend clause. */
316 else if (tsk->dependers->n_elem
317 && (tsk->dependers->elem[tsk->dependers->n_elem
318 - 1] == task))
319 continue;
320 else if (tsk->dependers->n_elem
321 == tsk->dependers->allocated)
323 tsk->dependers->allocated
324 = tsk->dependers->allocated * 2 + 2;
325 tsk->dependers
326 = gomp_realloc (tsk->dependers,
327 sizeof (struct gomp_dependers_vec)
328 + (tsk->dependers->allocated
329 * sizeof (struct gomp_task *)));
331 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
332 task->num_dependees++;
334 while (ent)
336 ent->redundant = true;
337 ent = ent->next;
340 *slot = HTAB_EMPTY_ENTRY;
342 if (htab_size (parent->depend_hash) <= 32)
344 parent->depend_hash->n_elements = 0;
345 parent->depend_hash->n_deleted = 0;
347 else
349 /* Shrink the hash table if it would be too large.
350 We don't want to walk e.g. megabytes of empty hash
351 table for every depend(inout: omp_all_memory). */
352 free (parent->depend_hash);
353 parent->depend_hash = htab_create (12);
356 parent->depend_all_memory = task;
357 return;
359 task->depend_count = ndepend;
360 if (parent->depend_hash == NULL)
361 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
362 for (i = 0; i < ndepend; i++)
364 task->depend[i].next = NULL;
365 task->depend[i].prev = NULL;
366 task->depend[i].task = task;
367 task->depend[i].redundant = false;
368 task->depend[i].redundant_out = false;
370 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
371 &task->depend[i], INSERT);
372 hash_entry_type out = NULL, last = NULL;
373 if (*slot)
375 /* If multiple depends on the same task are the same, all but the
376 first one are redundant. As inout/out come first, if any of them
377 is inout/out, it will win, which is the right semantics. */
378 if ((*slot)->task == task)
380 task->depend[i].redundant = true;
381 continue;
383 for (ent = *slot; ent; ent = ent->next)
385 if (ent->redundant_out)
386 break;
388 last = ent;
390 /* depend(in:...) doesn't depend on earlier depend(in:...).
391 Similarly depend(inoutset:...) doesn't depend on earlier
392 depend(inoutset:...). */
393 if (task->depend[i].is_in && task->depend[i].is_in == ent->is_in)
394 continue;
396 if (!ent->is_in)
397 out = ent;
399 struct gomp_task *tsk = ent->task;
400 if (tsk->dependers == NULL)
402 tsk->dependers
403 = gomp_malloc (sizeof (struct gomp_dependers_vec)
404 + 6 * sizeof (struct gomp_task *));
405 tsk->dependers->n_elem = 1;
406 tsk->dependers->allocated = 6;
407 tsk->dependers->elem[0] = task;
408 task->num_dependees++;
409 continue;
411 /* We already have some other dependency on tsk from earlier
412 depend clause. */
413 else if (tsk->dependers->n_elem
414 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
415 == task))
416 continue;
417 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
419 tsk->dependers->allocated
420 = tsk->dependers->allocated * 2 + 2;
421 tsk->dependers
422 = gomp_realloc (tsk->dependers,
423 sizeof (struct gomp_dependers_vec)
424 + (tsk->dependers->allocated
425 * sizeof (struct gomp_task *)));
427 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
428 task->num_dependees++;
430 task->depend[i].next = *slot;
431 (*slot)->prev = &task->depend[i];
433 *slot = &task->depend[i];
435 /* There is no need to store more than one depend({,in}out:) task per
436 address in the hash table chain for the purpose of creation of
437 deferred tasks, because each out depends on all earlier outs, thus it
438 is enough to record just the last depend({,in}out:). For depend(in:),
439 we need to keep all of the previous ones not terminated yet, because
440 a later depend({,in}out:) might need to depend on all of them. So, if
441 the new task's clause is depend({,in}out:), we know there is at most
442 one other depend({,in}out:) clause in the list (out). For
443 non-deferred tasks we want to see all outs, so they are moved to the
444 end of the chain, after first redundant_out entry all following
445 entries should be redundant_out. */
446 if (!task->depend[i].is_in && out)
448 if (out != last)
450 out->next->prev = out->prev;
451 out->prev->next = out->next;
452 out->next = last->next;
453 out->prev = last;
454 last->next = out;
455 if (out->next)
456 out->next->prev = out;
458 out->redundant_out = true;
463 /* Body of empty task like taskwait nowait depend. */
465 static void
466 empty_task (void *data __attribute__((unused)))
470 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
471 static inline size_t gomp_task_run_post_handle_depend (struct gomp_task *,
472 struct gomp_team *);
474 /* Called when encountering an explicit task directive. If IF_CLAUSE is
475 false, then we must not delay in executing the task. If UNTIED is true,
476 then the task may be executed by any member of the team.
478 DEPEND is an array containing:
479 if depend[0] is non-zero, then:
480 depend[0]: number of depend elements.
481 depend[1]: number of depend elements of type "out/inout".
482 depend[2..N+1]: address of [1..N]th depend element.
483 otherwise, when depend[0] is zero, then:
484 depend[1]: number of depend elements.
485 depend[2]: number of depend elements of type "out/inout".
486 depend[3]: number of depend elements of type "mutexinoutset".
487 depend[4]: number of depend elements of type "in".
488 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
489 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
490 omp_depend_t objects. */
492 void
493 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
494 long arg_size, long arg_align, bool if_clause, unsigned flags,
495 void **depend, int priority_arg, void *detach)
497 struct gomp_thread *thr = gomp_thread ();
498 struct gomp_team *team = thr->ts.team;
499 int priority = 0;
501 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
502 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
503 tied to one thread all the time. This means UNTIED tasks must be
504 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
505 might be running on different thread than FN. */
506 if (cpyfn)
507 if_clause = false;
508 flags &= ~GOMP_TASK_FLAG_UNTIED;
509 #endif
511 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
512 if (__builtin_expect (gomp_cancel_var, 0) && team)
514 if (gomp_team_barrier_cancelled (&team->barrier))
515 return;
516 if (thr->task->taskgroup)
518 if (thr->task->taskgroup->cancelled)
519 return;
520 if (thr->task->taskgroup->workshare
521 && thr->task->taskgroup->prev
522 && thr->task->taskgroup->prev->cancelled)
523 return;
527 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
529 priority = priority_arg;
530 if (priority > gomp_max_task_priority_var)
531 priority = gomp_max_task_priority_var;
534 if (!if_clause || team == NULL
535 || (thr->task && thr->task->final_task)
536 || team->task_count > 64 * team->nthreads)
538 struct gomp_task task;
539 gomp_sem_t completion_sem;
541 /* If there are depend clauses and earlier deferred sibling tasks
542 with depend clauses, check if there isn't a dependency. If there
543 is, we need to wait for them. There is no need to handle
544 depend clauses for non-deferred tasks other than this, because
545 the parent task is suspended until the child task finishes and thus
546 it can't start further child tasks. */
547 if ((flags & GOMP_TASK_FLAG_DEPEND)
548 && thr->task && thr->task->depend_hash)
549 gomp_task_maybe_wait_for_dependencies (depend);
551 gomp_init_task (&task, thr->task, gomp_icv (false));
552 task.kind = GOMP_TASK_UNDEFERRED;
553 task.final_task = (thr->task && thr->task->final_task)
554 || (flags & GOMP_TASK_FLAG_FINAL);
555 task.priority = priority;
557 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
559 gomp_sem_init (&completion_sem, 0);
560 task.completion_sem = &completion_sem;
561 *(void **) detach = &task;
562 if (data)
563 *(void **) data = &task;
565 gomp_debug (0, "Thread %d: new event: %p\n",
566 thr->ts.team_id, &task);
569 if (thr->task)
571 task.in_tied_task = thr->task->in_tied_task;
572 task.taskgroup = thr->task->taskgroup;
574 thr->task = &task;
575 if (__builtin_expect (cpyfn != NULL, 0))
577 char buf[arg_size + arg_align - 1];
578 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
579 & ~(uintptr_t) (arg_align - 1));
580 cpyfn (arg, data);
581 fn (arg);
583 else
584 fn (data);
586 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
588 gomp_sem_wait (&completion_sem);
589 gomp_sem_destroy (&completion_sem);
592 /* Access to "children" is normally done inside a task_lock
593 mutex region, but the only way this particular task.children
594 can be set is if this thread's task work function (fn)
595 creates children. So since the setter is *this* thread, we
596 need no barriers here when testing for non-NULL. We can have
597 task.children set by the current thread then changed by a
598 child thread, but seeing a stale non-NULL value is not a
599 problem. Once past the task_lock acquisition, this thread
600 will see the real value of task.children. */
601 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
603 gomp_mutex_lock (&team->task_lock);
604 gomp_clear_parent (&task.children_queue);
605 gomp_mutex_unlock (&team->task_lock);
607 gomp_end_task ();
609 else
611 struct gomp_task *task;
612 struct gomp_task *parent = thr->task;
613 struct gomp_taskgroup *taskgroup = parent->taskgroup;
614 char *arg;
615 bool do_wake;
616 size_t depend_size = 0;
618 if (flags & GOMP_TASK_FLAG_DEPEND)
619 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
620 * sizeof (struct gomp_task_depend_entry));
621 task = gomp_malloc (sizeof (*task) + depend_size
622 + arg_size + arg_align - 1);
623 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
624 & ~(uintptr_t) (arg_align - 1));
625 gomp_init_task (task, parent, gomp_icv (false));
626 task->priority = priority;
627 task->kind = GOMP_TASK_UNDEFERRED;
628 task->in_tied_task = parent->in_tied_task;
629 task->taskgroup = taskgroup;
630 task->deferred_p = true;
631 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
633 task->detach_team = team;
635 *(void **) detach = task;
636 if (data)
637 *(void **) data = task;
639 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
641 thr->task = task;
642 if (cpyfn)
644 cpyfn (arg, data);
645 task->copy_ctors_done = true;
647 else
648 memcpy (arg, data, arg_size);
649 thr->task = parent;
650 task->kind = GOMP_TASK_WAITING;
651 task->fn = fn;
652 task->fn_data = arg;
653 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
654 gomp_mutex_lock (&team->task_lock);
655 /* If parallel or taskgroup has been cancelled, don't start new
656 tasks. */
657 if (__builtin_expect (gomp_cancel_var, 0)
658 && !task->copy_ctors_done)
660 if (gomp_team_barrier_cancelled (&team->barrier))
662 do_cancel:
663 gomp_mutex_unlock (&team->task_lock);
664 gomp_finish_task (task);
665 free (task);
666 return;
668 if (taskgroup)
670 if (taskgroup->cancelled)
671 goto do_cancel;
672 if (taskgroup->workshare
673 && taskgroup->prev
674 && taskgroup->prev->cancelled)
675 goto do_cancel;
678 if (taskgroup)
679 taskgroup->num_children++;
680 if (depend_size)
682 gomp_task_handle_depend (task, parent, depend);
683 if (task->num_dependees)
685 /* Tasks that depend on other tasks are not put into the
686 various waiting queues, so we are done for now. Said
687 tasks are instead put into the queues via
688 gomp_task_run_post_handle_dependers() after their
689 dependencies have been satisfied. After which, they
690 can be picked up by the various scheduling
691 points. */
692 gomp_mutex_unlock (&team->task_lock);
693 return;
695 /* Check for taskwait nowait depend which doesn't need to wait for
696 anything. */
697 if (__builtin_expect (fn == empty_task, 0))
699 if (taskgroup)
700 taskgroup->num_children--;
701 gomp_task_run_post_handle_depend_hash (task);
702 gomp_mutex_unlock (&team->task_lock);
703 gomp_finish_task (task);
704 free (task);
705 return;
709 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
710 task, priority,
711 PRIORITY_INSERT_BEGIN,
712 /*adjust_parent_depends_on=*/false,
713 task->parent_depends_on);
714 if (taskgroup)
715 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
716 task, priority,
717 PRIORITY_INSERT_BEGIN,
718 /*adjust_parent_depends_on=*/false,
719 task->parent_depends_on);
721 priority_queue_insert (PQ_TEAM, &team->task_queue,
722 task, priority,
723 PRIORITY_INSERT_END,
724 /*adjust_parent_depends_on=*/false,
725 task->parent_depends_on);
727 ++team->task_count;
728 ++team->task_queued_count;
729 gomp_team_barrier_set_task_pending (&team->barrier);
730 do_wake = team->task_running_count + !parent->in_tied_task
731 < team->nthreads;
732 gomp_mutex_unlock (&team->task_lock);
733 if (do_wake)
734 gomp_team_barrier_wake (&team->barrier, 1);
738 ialias (GOMP_task)
739 ialias (GOMP_taskgroup_start)
740 ialias (GOMP_taskgroup_end)
741 ialias (GOMP_taskgroup_reduction_register)
743 #define TYPE long
744 #define UTYPE unsigned long
745 #define TYPE_is_long 1
746 #include "taskloop.c"
747 #undef TYPE
748 #undef UTYPE
749 #undef TYPE_is_long
751 #define TYPE unsigned long long
752 #define UTYPE TYPE
753 #define GOMP_taskloop GOMP_taskloop_ull
754 #include "taskloop.c"
755 #undef TYPE
756 #undef UTYPE
757 #undef GOMP_taskloop
759 static void inline
760 priority_queue_move_task_first (enum priority_queue_type type,
761 struct priority_queue *head,
762 struct gomp_task *task)
764 #if _LIBGOMP_CHECKING_
765 if (!priority_queue_task_in_queue_p (type, head, task))
766 gomp_fatal ("Attempt to move first missing task %p", task);
767 #endif
768 struct priority_list *list;
769 if (priority_queue_multi_p (head))
771 list = priority_queue_lookup_priority (head, task->priority);
772 #if _LIBGOMP_CHECKING_
773 if (!list)
774 gomp_fatal ("Unable to find priority %d", task->priority);
775 #endif
777 else
778 list = &head->l;
779 priority_list_remove (list, task_to_priority_node (type, task), 0);
780 priority_list_insert (type, list, task, task->priority,
781 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
782 task->parent_depends_on);
785 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
786 with team->task_lock held, or is executed in the thread that called
787 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
788 run before it acquires team->task_lock. */
790 static void
791 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
793 struct gomp_task *parent = task->parent;
794 if (parent)
795 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
796 task);
798 struct gomp_taskgroup *taskgroup = task->taskgroup;
799 if (taskgroup)
800 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
801 task);
803 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
804 PRIORITY_INSERT_BEGIN, false,
805 task->parent_depends_on);
806 task->kind = GOMP_TASK_WAITING;
807 if (parent && parent->taskwait)
809 if (parent->taskwait->in_taskwait)
811 /* One more task has had its dependencies met.
812 Inform any waiters. */
813 parent->taskwait->in_taskwait = false;
814 gomp_sem_post (&parent->taskwait->taskwait_sem);
816 else if (parent->taskwait->in_depend_wait)
818 /* One more task has had its dependencies met.
819 Inform any waiters. */
820 parent->taskwait->in_depend_wait = false;
821 gomp_sem_post (&parent->taskwait->taskwait_sem);
824 if (taskgroup && taskgroup->in_taskgroup_wait)
826 /* One more task has had its dependencies met.
827 Inform any waiters. */
828 taskgroup->in_taskgroup_wait = false;
829 gomp_sem_post (&taskgroup->taskgroup_sem);
832 ++team->task_queued_count;
833 gomp_team_barrier_set_task_pending (&team->barrier);
834 /* I'm afraid this can't be done after releasing team->task_lock,
835 as gomp_target_task_completion is run from unrelated thread and
836 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
837 the team could be gone already. */
838 if (team->nthreads > team->task_running_count)
839 gomp_team_barrier_wake (&team->barrier, 1);
842 /* Signal that a target task TTASK has completed the asynchronously
843 running phase and should be requeued as a task to handle the
844 variable unmapping. */
846 void
847 GOMP_PLUGIN_target_task_completion (void *data)
849 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
850 struct gomp_task *task = ttask->task;
851 struct gomp_team *team = ttask->team;
853 gomp_mutex_lock (&team->task_lock);
854 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
856 ttask->state = GOMP_TARGET_TASK_FINISHED;
857 gomp_mutex_unlock (&team->task_lock);
858 return;
860 ttask->state = GOMP_TARGET_TASK_FINISHED;
861 gomp_target_task_completion (team, task);
862 gomp_mutex_unlock (&team->task_lock);
865 /* Called for nowait target tasks. */
867 bool
868 gomp_create_target_task (struct gomp_device_descr *devicep,
869 void (*fn) (void *), size_t mapnum, void **hostaddrs,
870 size_t *sizes, unsigned short *kinds,
871 unsigned int flags, void **depend, void **args,
872 enum gomp_target_task_state state)
874 struct gomp_thread *thr = gomp_thread ();
875 struct gomp_team *team = thr->ts.team;
877 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
878 if (__builtin_expect (gomp_cancel_var, 0) && team)
880 if (gomp_team_barrier_cancelled (&team->barrier))
881 return true;
882 if (thr->task->taskgroup)
884 if (thr->task->taskgroup->cancelled)
885 return true;
886 if (thr->task->taskgroup->workshare
887 && thr->task->taskgroup->prev
888 && thr->task->taskgroup->prev->cancelled)
889 return true;
893 struct gomp_target_task *ttask;
894 struct gomp_task *task;
895 struct gomp_task *parent = thr->task;
896 struct gomp_taskgroup *taskgroup = parent->taskgroup;
897 bool do_wake;
898 size_t depend_size = 0;
899 uintptr_t depend_cnt = 0;
900 size_t tgt_align = 0, tgt_size = 0;
901 uintptr_t args_cnt = 0;
903 if (depend != NULL)
905 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
906 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
908 if (fn)
910 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
911 firstprivate on the target task. */
912 size_t i;
913 for (i = 0; i < mapnum; i++)
914 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
916 size_t align = (size_t) 1 << (kinds[i] >> 8);
917 if (tgt_align < align)
918 tgt_align = align;
919 tgt_size = (tgt_size + align - 1) & ~(align - 1);
920 tgt_size += sizes[i];
922 if (tgt_align)
923 tgt_size += tgt_align - 1;
924 else
925 tgt_size = 0;
926 if (args)
928 void **cargs = args;
929 while (*cargs)
931 intptr_t id = (intptr_t) *cargs++;
932 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
933 cargs++;
935 args_cnt = cargs + 1 - args;
939 task = gomp_malloc (sizeof (*task) + depend_size
940 + sizeof (*ttask)
941 + args_cnt * sizeof (void *)
942 + mapnum * (sizeof (void *) + sizeof (size_t)
943 + sizeof (unsigned short))
944 + tgt_size);
945 gomp_init_task (task, parent, gomp_icv (false));
946 task->priority = 0;
947 task->kind = GOMP_TASK_WAITING;
948 task->in_tied_task = parent->in_tied_task;
949 task->taskgroup = taskgroup;
950 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
951 ttask->devicep = devicep;
952 ttask->fn = fn;
953 ttask->mapnum = mapnum;
954 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
955 if (args_cnt)
957 ttask->args = (void **) &ttask->hostaddrs[mapnum];
958 memcpy (ttask->args, args, args_cnt * sizeof (void *));
959 ttask->sizes = (size_t *) &ttask->args[args_cnt];
961 else
963 ttask->args = args;
964 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
966 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
967 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
968 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
969 if (tgt_align)
971 char *tgt = (char *) &ttask->kinds[mapnum];
972 size_t i;
973 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
974 if (al)
975 tgt += tgt_align - al;
976 tgt_size = 0;
977 for (i = 0; i < mapnum; i++)
978 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
980 size_t align = (size_t) 1 << (kinds[i] >> 8);
981 tgt_size = (tgt_size + align - 1) & ~(align - 1);
982 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
983 ttask->hostaddrs[i] = tgt + tgt_size;
984 tgt_size = tgt_size + sizes[i];
987 ttask->flags = flags;
988 ttask->state = state;
989 ttask->task = task;
990 ttask->team = team;
991 task->fn = NULL;
992 task->fn_data = ttask;
993 task->final_task = 0;
994 gomp_mutex_lock (&team->task_lock);
995 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
996 if (__builtin_expect (gomp_cancel_var, 0))
998 if (gomp_team_barrier_cancelled (&team->barrier))
1000 do_cancel:
1001 gomp_mutex_unlock (&team->task_lock);
1002 gomp_finish_task (task);
1003 free (task);
1004 return true;
1006 if (taskgroup)
1008 if (taskgroup->cancelled)
1009 goto do_cancel;
1010 if (taskgroup->workshare
1011 && taskgroup->prev
1012 && taskgroup->prev->cancelled)
1013 goto do_cancel;
1016 if (depend_size)
1018 gomp_task_handle_depend (task, parent, depend);
1019 if (task->num_dependees)
1021 if (taskgroup)
1022 taskgroup->num_children++;
1023 gomp_mutex_unlock (&team->task_lock);
1024 return true;
1027 if (state == GOMP_TARGET_TASK_DATA)
1029 gomp_task_run_post_handle_depend_hash (task);
1030 gomp_mutex_unlock (&team->task_lock);
1031 gomp_finish_task (task);
1032 free (task);
1033 return false;
1035 if (taskgroup)
1036 taskgroup->num_children++;
1037 /* For async offloading, if we don't need to wait for dependencies,
1038 run the gomp_target_task_fn right away, essentially schedule the
1039 mapping part of the task in the current thread. */
1040 if (devicep != NULL
1041 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1043 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
1044 PRIORITY_INSERT_END,
1045 /*adjust_parent_depends_on=*/false,
1046 task->parent_depends_on);
1047 if (taskgroup)
1048 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1049 task, 0, PRIORITY_INSERT_END,
1050 /*adjust_parent_depends_on=*/false,
1051 task->parent_depends_on);
1052 task->pnode[PQ_TEAM].next = NULL;
1053 task->pnode[PQ_TEAM].prev = NULL;
1054 task->kind = GOMP_TASK_TIED;
1055 ++team->task_count;
1056 gomp_mutex_unlock (&team->task_lock);
1058 thr->task = task;
1059 gomp_target_task_fn (task->fn_data);
1060 thr->task = parent;
1062 gomp_mutex_lock (&team->task_lock);
1063 task->kind = GOMP_TASK_ASYNC_RUNNING;
1064 /* If GOMP_PLUGIN_target_task_completion has run already
1065 in between gomp_target_task_fn and the mutex lock,
1066 perform the requeuing here. */
1067 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1068 gomp_target_task_completion (team, task);
1069 else
1070 ttask->state = GOMP_TARGET_TASK_RUNNING;
1071 gomp_mutex_unlock (&team->task_lock);
1072 return true;
1074 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
1075 PRIORITY_INSERT_BEGIN,
1076 /*adjust_parent_depends_on=*/false,
1077 task->parent_depends_on);
1078 if (taskgroup)
1079 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
1080 PRIORITY_INSERT_BEGIN,
1081 /*adjust_parent_depends_on=*/false,
1082 task->parent_depends_on);
1083 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
1084 PRIORITY_INSERT_END,
1085 /*adjust_parent_depends_on=*/false,
1086 task->parent_depends_on);
1087 ++team->task_count;
1088 ++team->task_queued_count;
1089 gomp_team_barrier_set_task_pending (&team->barrier);
1090 do_wake = team->task_running_count + !parent->in_tied_task
1091 < team->nthreads;
1092 gomp_mutex_unlock (&team->task_lock);
1093 if (do_wake)
1094 gomp_team_barrier_wake (&team->barrier, 1);
1095 return true;
1098 /* Given a parent_depends_on task in LIST, move it to the front of its
1099 priority so it is run as soon as possible.
1101 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
1103 We rearrange the queue such that all parent_depends_on tasks are
1104 first, and last_parent_depends_on points to the last such task we
1105 rearranged. For example, given the following tasks in a queue
1106 where PD[123] are the parent_depends_on tasks:
1108 task->children
1111 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
1113 We rearrange such that:
1115 task->children
1116 | +--- last_parent_depends_on
1119 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
1121 static void inline
1122 priority_list_upgrade_task (struct priority_list *list,
1123 struct priority_node *node)
1125 struct priority_node *last_parent_depends_on
1126 = list->last_parent_depends_on;
1127 if (last_parent_depends_on)
1129 node->prev->next = node->next;
1130 node->next->prev = node->prev;
1131 node->prev = last_parent_depends_on;
1132 node->next = last_parent_depends_on->next;
1133 node->prev->next = node;
1134 node->next->prev = node;
1136 else if (node != list->tasks)
1138 node->prev->next = node->next;
1139 node->next->prev = node->prev;
1140 node->prev = list->tasks->prev;
1141 node->next = list->tasks;
1142 list->tasks = node;
1143 node->prev->next = node;
1144 node->next->prev = node;
1146 list->last_parent_depends_on = node;
1149 /* Given a parent_depends_on TASK in its parent's children_queue, move
1150 it to the front of its priority so it is run as soon as possible.
1152 PARENT is passed as an optimization.
1154 (This function could be defined in priority_queue.c, but we want it
1155 inlined, and putting it in priority_queue.h is not an option, given
1156 that gomp_task has not been properly defined at that point). */
1158 static void inline
1159 priority_queue_upgrade_task (struct gomp_task *task,
1160 struct gomp_task *parent)
1162 struct priority_queue *head = &parent->children_queue;
1163 struct priority_node *node = &task->pnode[PQ_CHILDREN];
1164 #if _LIBGOMP_CHECKING_
1165 if (!task->parent_depends_on)
1166 gomp_fatal ("priority_queue_upgrade_task: task must be a "
1167 "parent_depends_on task");
1168 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
1169 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
1170 #endif
1171 if (priority_queue_multi_p (head))
1173 struct priority_list *list
1174 = priority_queue_lookup_priority (head, task->priority);
1175 priority_list_upgrade_task (list, node);
1177 else
1178 priority_list_upgrade_task (&head->l, node);
1181 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1182 the way in LIST so that other tasks can be considered for
1183 execution. LIST contains tasks of type TYPE.
1185 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1186 if applicable. */
1188 static void inline
1189 priority_list_downgrade_task (enum priority_queue_type type,
1190 struct priority_list *list,
1191 struct gomp_task *child_task)
1193 struct priority_node *node = task_to_priority_node (type, child_task);
1194 if (list->tasks == node)
1195 list->tasks = node->next;
1196 else if (node->next != list->tasks)
1198 /* The task in NODE is about to become TIED and TIED tasks
1199 cannot come before WAITING tasks. If we're about to
1200 leave the queue in such an indeterminate state, rewire
1201 things appropriately. However, a TIED task at the end is
1202 perfectly fine. */
1203 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1204 if (next_task->kind == GOMP_TASK_WAITING)
1206 /* Remove from list. */
1207 node->prev->next = node->next;
1208 node->next->prev = node->prev;
1209 /* Rewire at the end. */
1210 node->next = list->tasks;
1211 node->prev = list->tasks->prev;
1212 list->tasks->prev->next = node;
1213 list->tasks->prev = node;
1217 /* If the current task is the last_parent_depends_on for its
1218 priority, adjust last_parent_depends_on appropriately. */
1219 if (__builtin_expect (child_task->parent_depends_on, 0)
1220 && list->last_parent_depends_on == node)
1222 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1223 if (node->prev != node
1224 && prev_child->kind == GOMP_TASK_WAITING
1225 && prev_child->parent_depends_on)
1226 list->last_parent_depends_on = node->prev;
1227 else
1229 /* There are no more parent_depends_on entries waiting
1230 to run, clear the list. */
1231 list->last_parent_depends_on = NULL;
1236 /* Given a TASK in HEAD that is about to be executed, move it out of
1237 the way so that other tasks can be considered for execution. HEAD
1238 contains tasks of type TYPE.
1240 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1241 if applicable.
1243 (This function could be defined in priority_queue.c, but we want it
1244 inlined, and putting it in priority_queue.h is not an option, given
1245 that gomp_task has not been properly defined at that point). */
1247 static void inline
1248 priority_queue_downgrade_task (enum priority_queue_type type,
1249 struct priority_queue *head,
1250 struct gomp_task *task)
1252 #if _LIBGOMP_CHECKING_
1253 if (!priority_queue_task_in_queue_p (type, head, task))
1254 gomp_fatal ("Attempt to downgrade missing task %p", task);
1255 #endif
1256 if (priority_queue_multi_p (head))
1258 struct priority_list *list
1259 = priority_queue_lookup_priority (head, task->priority);
1260 priority_list_downgrade_task (type, list, task);
1262 else
1263 priority_list_downgrade_task (type, &head->l, task);
1266 /* Setup CHILD_TASK to execute. This is done by setting the task to
1267 TIED, and updating all relevant queues so that CHILD_TASK is no
1268 longer chosen for scheduling. Also, remove CHILD_TASK from the
1269 overall team task queue entirely.
1271 Return TRUE if task or its containing taskgroup has been
1272 cancelled. */
1274 static inline bool
1275 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1276 struct gomp_team *team)
1278 #if _LIBGOMP_CHECKING_
1279 if (child_task->parent)
1280 priority_queue_verify (PQ_CHILDREN,
1281 &child_task->parent->children_queue, true);
1282 if (child_task->taskgroup)
1283 priority_queue_verify (PQ_TASKGROUP,
1284 &child_task->taskgroup->taskgroup_queue, false);
1285 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1286 #endif
1288 /* Task is about to go tied, move it out of the way. */
1289 if (parent)
1290 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1291 child_task);
1293 /* Task is about to go tied, move it out of the way. */
1294 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1295 if (taskgroup)
1296 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1297 child_task);
1299 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1300 MEMMODEL_RELAXED);
1301 child_task->pnode[PQ_TEAM].next = NULL;
1302 child_task->pnode[PQ_TEAM].prev = NULL;
1303 child_task->kind = GOMP_TASK_TIED;
1305 if (--team->task_queued_count == 0)
1306 gomp_team_barrier_clear_task_pending (&team->barrier);
1307 if (__builtin_expect (gomp_cancel_var, 0)
1308 && !child_task->copy_ctors_done)
1310 if (gomp_team_barrier_cancelled (&team->barrier))
1311 return true;
1312 if (taskgroup)
1314 if (taskgroup->cancelled)
1315 return true;
1316 if (taskgroup->workshare
1317 && taskgroup->prev
1318 && taskgroup->prev->cancelled)
1319 return true;
1322 return false;
1325 static void
1326 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1328 struct gomp_task *parent = child_task->parent;
1329 size_t i;
1331 if (parent->depend_all_memory == child_task)
1332 parent->depend_all_memory = NULL;
1333 for (i = 0; i < child_task->depend_count; i++)
1334 if (!child_task->depend[i].redundant)
1336 if (child_task->depend[i].next)
1337 child_task->depend[i].next->prev = child_task->depend[i].prev;
1338 if (child_task->depend[i].prev)
1339 child_task->depend[i].prev->next = child_task->depend[i].next;
1340 else
1342 hash_entry_type *slot
1343 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1344 NO_INSERT);
1345 if (*slot != &child_task->depend[i])
1346 abort ();
1347 if (child_task->depend[i].next)
1348 *slot = child_task->depend[i].next;
1349 else
1350 htab_clear_slot (parent->depend_hash, slot);
1355 /* After a CHILD_TASK has been run, adjust the dependency queue for
1356 each task that depends on CHILD_TASK, to record the fact that there
1357 is one less dependency to worry about. If a task that depended on
1358 CHILD_TASK now has no dependencies, place it in the various queues
1359 so it gets scheduled to run.
1361 TEAM is the team to which CHILD_TASK belongs to. */
1363 static size_t
1364 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1365 struct gomp_team *team)
1367 struct gomp_task *parent = child_task->parent;
1368 size_t i, count = child_task->dependers->n_elem, ret = 0;
1369 for (i = 0; i < count; i++)
1371 struct gomp_task *task = child_task->dependers->elem[i];
1373 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1374 TASK's remaining dependencies. Once TASK has no other
1375 dependencies, put it into the various queues so it will get
1376 scheduled for execution. */
1377 if (--task->num_dependees != 0)
1378 continue;
1380 struct gomp_taskgroup *taskgroup = task->taskgroup;
1381 if (__builtin_expect (task->fn == empty_task, 0))
1383 if (!parent)
1384 task->parent = NULL;
1385 else if (__builtin_expect (task->parent_depends_on, 0)
1386 && --parent->taskwait->n_depend == 0
1387 && parent->taskwait->in_depend_wait)
1389 parent->taskwait->in_depend_wait = false;
1390 gomp_sem_post (&parent->taskwait->taskwait_sem);
1392 if (gomp_task_run_post_handle_depend (task, team))
1393 ++ret;
1394 if (taskgroup)
1396 if (taskgroup->num_children > 1)
1397 --taskgroup->num_children;
1398 else
1400 __atomic_store_n (&taskgroup->num_children, 0,
1401 MEMMODEL_RELEASE);
1402 if (taskgroup->in_taskgroup_wait)
1404 taskgroup->in_taskgroup_wait = false;
1405 gomp_sem_post (&taskgroup->taskgroup_sem);
1409 gomp_finish_task (task);
1410 free (task);
1411 continue;
1413 if (parent)
1415 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1416 task, task->priority,
1417 PRIORITY_INSERT_BEGIN,
1418 /*adjust_parent_depends_on=*/true,
1419 task->parent_depends_on);
1420 if (parent->taskwait)
1422 if (parent->taskwait->in_taskwait)
1424 /* One more task has had its dependencies met.
1425 Inform any waiters. */
1426 parent->taskwait->in_taskwait = false;
1427 gomp_sem_post (&parent->taskwait->taskwait_sem);
1429 else if (parent->taskwait->in_depend_wait)
1431 /* One more task has had its dependencies met.
1432 Inform any waiters. */
1433 parent->taskwait->in_depend_wait = false;
1434 gomp_sem_post (&parent->taskwait->taskwait_sem);
1438 else
1439 task->parent = NULL;
1440 if (taskgroup)
1442 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1443 task, task->priority,
1444 PRIORITY_INSERT_BEGIN,
1445 /*adjust_parent_depends_on=*/false,
1446 task->parent_depends_on);
1447 if (taskgroup->in_taskgroup_wait)
1449 /* One more task has had its dependencies met.
1450 Inform any waiters. */
1451 taskgroup->in_taskgroup_wait = false;
1452 gomp_sem_post (&taskgroup->taskgroup_sem);
1455 priority_queue_insert (PQ_TEAM, &team->task_queue,
1456 task, task->priority,
1457 PRIORITY_INSERT_END,
1458 /*adjust_parent_depends_on=*/false,
1459 task->parent_depends_on);
1460 ++team->task_count;
1461 ++team->task_queued_count;
1462 ++ret;
1464 free (child_task->dependers);
1465 child_task->dependers = NULL;
1466 if (ret > 1)
1467 gomp_team_barrier_set_task_pending (&team->barrier);
1468 return ret;
1471 static inline size_t
1472 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1473 struct gomp_team *team)
1475 if (child_task->depend_count == 0)
1476 return 0;
1478 /* If parent is gone already, the hash table is freed and nothing
1479 will use the hash table anymore, no need to remove anything from it. */
1480 if (child_task->parent != NULL)
1481 gomp_task_run_post_handle_depend_hash (child_task);
1483 if (child_task->dependers == NULL)
1484 return 0;
1486 return gomp_task_run_post_handle_dependers (child_task, team);
1489 /* Remove CHILD_TASK from its parent. */
1491 static inline void
1492 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1494 struct gomp_task *parent = child_task->parent;
1495 if (parent == NULL)
1496 return;
1498 /* If this was the last task the parent was depending on,
1499 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1500 clean up and return. */
1501 if (__builtin_expect (child_task->parent_depends_on, 0)
1502 && --parent->taskwait->n_depend == 0
1503 && parent->taskwait->in_depend_wait)
1505 parent->taskwait->in_depend_wait = false;
1506 gomp_sem_post (&parent->taskwait->taskwait_sem);
1509 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1510 child_task, MEMMODEL_RELEASE)
1511 && parent->taskwait && parent->taskwait->in_taskwait)
1513 parent->taskwait->in_taskwait = false;
1514 gomp_sem_post (&parent->taskwait->taskwait_sem);
1516 child_task->pnode[PQ_CHILDREN].next = NULL;
1517 child_task->pnode[PQ_CHILDREN].prev = NULL;
1520 /* Remove CHILD_TASK from its taskgroup. */
1522 static inline void
1523 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1525 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1526 if (taskgroup == NULL)
1527 return;
1528 bool empty = priority_queue_remove (PQ_TASKGROUP,
1529 &taskgroup->taskgroup_queue,
1530 child_task, MEMMODEL_RELAXED);
1531 child_task->pnode[PQ_TASKGROUP].next = NULL;
1532 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1533 if (taskgroup->num_children > 1)
1534 --taskgroup->num_children;
1535 else
1537 /* We access taskgroup->num_children in GOMP_taskgroup_end
1538 outside of the task lock mutex region, so
1539 need a release barrier here to ensure memory
1540 written by child_task->fn above is flushed
1541 before the NULL is written. */
1542 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1544 if (empty && taskgroup->in_taskgroup_wait)
1546 taskgroup->in_taskgroup_wait = false;
1547 gomp_sem_post (&taskgroup->taskgroup_sem);
1551 void
1552 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1554 struct gomp_thread *thr = gomp_thread ();
1555 struct gomp_team *team = thr->ts.team;
1556 struct gomp_task *task = thr->task;
1557 struct gomp_task *child_task = NULL;
1558 struct gomp_task *to_free = NULL;
1559 int do_wake = 0;
1561 gomp_mutex_lock (&team->task_lock);
1562 if (gomp_barrier_last_thread (state))
1564 if (team->task_count == 0)
1566 gomp_team_barrier_done (&team->barrier, state);
1567 gomp_mutex_unlock (&team->task_lock);
1568 gomp_team_barrier_wake (&team->barrier, 0);
1569 return;
1571 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1574 while (1)
1576 bool cancelled = false;
1578 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1580 bool ignored;
1581 child_task
1582 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1583 PQ_IGNORED, NULL,
1584 &ignored);
1585 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1586 team);
1587 if (__builtin_expect (cancelled, 0))
1589 if (to_free)
1591 gomp_finish_task (to_free);
1592 free (to_free);
1593 to_free = NULL;
1595 goto finish_cancelled;
1597 team->task_running_count++;
1598 child_task->in_tied_task = true;
1600 else if (team->task_count == 0
1601 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1603 gomp_team_barrier_done (&team->barrier, state);
1604 gomp_mutex_unlock (&team->task_lock);
1605 gomp_team_barrier_wake (&team->barrier, 0);
1606 if (to_free)
1608 gomp_finish_task (to_free);
1609 free (to_free);
1611 return;
1613 gomp_mutex_unlock (&team->task_lock);
1614 if (do_wake)
1616 gomp_team_barrier_wake (&team->barrier, do_wake);
1617 do_wake = 0;
1619 if (to_free)
1621 gomp_finish_task (to_free);
1622 free (to_free);
1623 to_free = NULL;
1625 if (child_task)
1627 thr->task = child_task;
1628 if (__builtin_expect (child_task->fn == NULL, 0))
1630 if (gomp_target_task_fn (child_task->fn_data))
1632 thr->task = task;
1633 gomp_mutex_lock (&team->task_lock);
1634 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1635 team->task_running_count--;
1636 struct gomp_target_task *ttask
1637 = (struct gomp_target_task *) child_task->fn_data;
1638 /* If GOMP_PLUGIN_target_task_completion has run already
1639 in between gomp_target_task_fn and the mutex lock,
1640 perform the requeuing here. */
1641 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1642 gomp_target_task_completion (team, child_task);
1643 else
1644 ttask->state = GOMP_TARGET_TASK_RUNNING;
1645 child_task = NULL;
1646 continue;
1649 else
1650 child_task->fn (child_task->fn_data);
1651 thr->task = task;
1653 else
1654 return;
1655 gomp_mutex_lock (&team->task_lock);
1656 if (child_task)
1658 if (child_task->detach_team)
1660 assert (child_task->detach_team == team);
1661 child_task->kind = GOMP_TASK_DETACHED;
1662 ++team->task_detach_count;
1663 --team->task_running_count;
1664 gomp_debug (0,
1665 "thread %d: task with event %p finished without "
1666 "completion event fulfilled in team barrier\n",
1667 thr->ts.team_id, child_task);
1668 child_task = NULL;
1669 continue;
1672 finish_cancelled:;
1673 size_t new_tasks
1674 = gomp_task_run_post_handle_depend (child_task, team);
1675 gomp_task_run_post_remove_parent (child_task);
1676 gomp_clear_parent (&child_task->children_queue);
1677 gomp_task_run_post_remove_taskgroup (child_task);
1678 to_free = child_task;
1679 if (!cancelled)
1680 team->task_running_count--;
1681 child_task = NULL;
1682 if (new_tasks > 1)
1684 do_wake = team->nthreads - team->task_running_count;
1685 if (do_wake > new_tasks)
1686 do_wake = new_tasks;
1688 --team->task_count;
1693 /* Called when encountering a taskwait directive.
1695 Wait for all children of the current task. */
1697 void
1698 GOMP_taskwait (void)
1700 struct gomp_thread *thr = gomp_thread ();
1701 struct gomp_team *team = thr->ts.team;
1702 struct gomp_task *task = thr->task;
1703 struct gomp_task *child_task = NULL;
1704 struct gomp_task *to_free = NULL;
1705 struct gomp_taskwait taskwait;
1706 int do_wake = 0;
1708 /* The acquire barrier on load of task->children here synchronizes
1709 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1710 not necessary that we synchronize with other non-NULL writes at
1711 this point, but we must ensure that all writes to memory by a
1712 child thread task work function are seen before we exit from
1713 GOMP_taskwait. */
1714 if (task == NULL
1715 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1716 return;
1718 memset (&taskwait, 0, sizeof (taskwait));
1719 bool child_q = false;
1720 gomp_mutex_lock (&team->task_lock);
1721 while (1)
1723 bool cancelled = false;
1724 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1726 bool destroy_taskwait = task->taskwait != NULL;
1727 task->taskwait = NULL;
1728 gomp_mutex_unlock (&team->task_lock);
1729 if (to_free)
1731 gomp_finish_task (to_free);
1732 free (to_free);
1734 if (destroy_taskwait)
1735 gomp_sem_destroy (&taskwait.taskwait_sem);
1736 return;
1738 struct gomp_task *next_task
1739 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1740 PQ_TEAM, &team->task_queue, &child_q);
1741 if (next_task->kind == GOMP_TASK_WAITING)
1743 child_task = next_task;
1744 cancelled
1745 = gomp_task_run_pre (child_task, task, team);
1746 if (__builtin_expect (cancelled, 0))
1748 if (to_free)
1750 gomp_finish_task (to_free);
1751 free (to_free);
1752 to_free = NULL;
1754 goto finish_cancelled;
1757 else
1759 /* All tasks we are waiting for are either running in other
1760 threads, are detached and waiting for the completion event to be
1761 fulfilled, or they are tasks that have not had their
1762 dependencies met (so they're not even in the queue). Wait
1763 for them. */
1764 if (task->taskwait == NULL)
1766 taskwait.in_depend_wait = false;
1767 gomp_sem_init (&taskwait.taskwait_sem, 0);
1768 task->taskwait = &taskwait;
1770 taskwait.in_taskwait = true;
1772 gomp_mutex_unlock (&team->task_lock);
1773 if (do_wake)
1775 gomp_team_barrier_wake (&team->barrier, do_wake);
1776 do_wake = 0;
1778 if (to_free)
1780 gomp_finish_task (to_free);
1781 free (to_free);
1782 to_free = NULL;
1784 if (child_task)
1786 thr->task = child_task;
1787 if (__builtin_expect (child_task->fn == NULL, 0))
1789 if (gomp_target_task_fn (child_task->fn_data))
1791 thr->task = task;
1792 gomp_mutex_lock (&team->task_lock);
1793 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1794 struct gomp_target_task *ttask
1795 = (struct gomp_target_task *) child_task->fn_data;
1796 /* If GOMP_PLUGIN_target_task_completion has run already
1797 in between gomp_target_task_fn and the mutex lock,
1798 perform the requeuing here. */
1799 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1800 gomp_target_task_completion (team, child_task);
1801 else
1802 ttask->state = GOMP_TARGET_TASK_RUNNING;
1803 child_task = NULL;
1804 continue;
1807 else
1808 child_task->fn (child_task->fn_data);
1809 thr->task = task;
1811 else
1812 gomp_sem_wait (&taskwait.taskwait_sem);
1813 gomp_mutex_lock (&team->task_lock);
1814 if (child_task)
1816 if (child_task->detach_team)
1818 assert (child_task->detach_team == team);
1819 child_task->kind = GOMP_TASK_DETACHED;
1820 ++team->task_detach_count;
1821 gomp_debug (0,
1822 "thread %d: task with event %p finished without "
1823 "completion event fulfilled in taskwait\n",
1824 thr->ts.team_id, child_task);
1825 child_task = NULL;
1826 continue;
1829 finish_cancelled:;
1830 size_t new_tasks
1831 = gomp_task_run_post_handle_depend (child_task, team);
1833 if (child_q)
1835 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1836 child_task, MEMMODEL_RELAXED);
1837 child_task->pnode[PQ_CHILDREN].next = NULL;
1838 child_task->pnode[PQ_CHILDREN].prev = NULL;
1841 gomp_clear_parent (&child_task->children_queue);
1843 gomp_task_run_post_remove_taskgroup (child_task);
1845 to_free = child_task;
1846 child_task = NULL;
1847 team->task_count--;
1848 if (new_tasks > 1)
1850 do_wake = team->nthreads - team->task_running_count
1851 - !task->in_tied_task;
1852 if (do_wake > new_tasks)
1853 do_wake = new_tasks;
1859 /* Called when encountering a taskwait directive with depend clause(s).
1860 Wait as if it was an mergeable included task construct with empty body. */
1862 void
1863 GOMP_taskwait_depend (void **depend)
1865 struct gomp_thread *thr = gomp_thread ();
1866 struct gomp_team *team = thr->ts.team;
1868 /* If parallel or taskgroup has been cancelled, return early. */
1869 if (__builtin_expect (gomp_cancel_var, 0) && team)
1871 if (gomp_team_barrier_cancelled (&team->barrier))
1872 return;
1873 if (thr->task->taskgroup)
1875 if (thr->task->taskgroup->cancelled)
1876 return;
1877 if (thr->task->taskgroup->workshare
1878 && thr->task->taskgroup->prev
1879 && thr->task->taskgroup->prev->cancelled)
1880 return;
1884 if (thr->task && thr->task->depend_hash)
1885 gomp_task_maybe_wait_for_dependencies (depend);
1888 /* Called when encountering a taskwait directive with nowait and depend
1889 clause(s). Create a possibly deferred task construct with empty body. */
1891 void
1892 GOMP_taskwait_depend_nowait (void **depend)
1894 ialias_call (GOMP_task) (empty_task, "", NULL, 0, 1, true,
1895 GOMP_TASK_FLAG_DEPEND, depend, 0, NULL);
1898 /* An undeferred task is about to run. Wait for all tasks that this
1899 undeferred task depends on.
1901 This is done by first putting all known ready dependencies
1902 (dependencies that have their own dependencies met) at the top of
1903 the scheduling queues. Then we iterate through these imminently
1904 ready tasks (and possibly other high priority tasks), and run them.
1905 If we run out of ready dependencies to execute, we either wait for
1906 the remaining dependencies to finish, or wait for them to get
1907 scheduled so we can run them.
1909 DEPEND is as in GOMP_task. */
1911 void
1912 gomp_task_maybe_wait_for_dependencies (void **depend)
1914 struct gomp_thread *thr = gomp_thread ();
1915 struct gomp_task *task = thr->task;
1916 struct gomp_team *team = thr->ts.team;
1917 struct gomp_task_depend_entry elem, *ent = NULL;
1918 struct gomp_taskwait taskwait;
1919 size_t orig_ndepend = (uintptr_t) depend[0];
1920 size_t nout = (uintptr_t) depend[1];
1921 size_t ndepend = orig_ndepend;
1922 size_t normal = ndepend;
1923 size_t n = 2;
1924 size_t i;
1925 size_t num_awaited = 0;
1926 struct gomp_task *child_task = NULL;
1927 struct gomp_task *to_free = NULL;
1928 int do_wake = 0;
1930 if (ndepend == 0)
1932 ndepend = nout;
1933 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1934 normal = nout + (uintptr_t) depend[4];
1935 n = 5;
1937 gomp_mutex_lock (&team->task_lock);
1938 if (__builtin_expect (task->depend_all_memory && ndepend, false))
1940 struct gomp_task *tsk = task->depend_all_memory;
1941 if (!tsk->parent_depends_on)
1943 tsk->parent_depends_on = true;
1944 ++num_awaited;
1945 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1946 priority_queue_upgrade_task (tsk, task);
1949 for (i = 0; i < ndepend; i++)
1951 elem.addr = depend[i + n];
1952 elem.is_in = i >= nout;
1953 if (__builtin_expect (i >= normal, 0))
1955 void **d = (void **) elem.addr;
1956 switch ((uintptr_t) d[1])
1958 case GOMP_DEPEND_IN:
1959 break;
1960 case GOMP_DEPEND_OUT:
1961 case GOMP_DEPEND_INOUT:
1962 case GOMP_DEPEND_MUTEXINOUTSET:
1963 elem.is_in = 0;
1964 break;
1965 case GOMP_DEPEND_INOUTSET:
1966 elem.is_in = 2;
1967 break;
1968 default:
1969 gomp_fatal ("unknown omp_depend_t dependence type %d",
1970 (int) (uintptr_t) d[1]);
1972 elem.addr = d[0];
1974 if (__builtin_expect (elem.addr == NULL && !elem.is_in, false))
1976 size_t size = htab_size (task->depend_hash);
1977 if (htab_elements (task->depend_hash) * 8 < size && size > 32)
1978 htab_expand (task->depend_hash);
1980 /* depend(inout: omp_all_memory) - depend on all previous
1981 sibling tasks that do have dependencies. Inlined
1982 htab_traverse. */
1983 hash_entry_type *slot = &task->depend_hash->entries[0];
1984 hash_entry_type *end = slot + htab_size (task->depend_hash);
1985 for (; slot != end; ++slot)
1987 if (*slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
1988 continue;
1989 for (ent = *slot; ent; ent = ent->next)
1991 struct gomp_task *tsk = ent->task;
1992 if (!tsk->parent_depends_on)
1994 tsk->parent_depends_on = true;
1995 ++num_awaited;
1996 if (tsk->num_dependees == 0
1997 && tsk->kind == GOMP_TASK_WAITING)
1998 priority_queue_upgrade_task (tsk, task);
2002 break;
2004 ent = htab_find (task->depend_hash, &elem);
2005 for (; ent; ent = ent->next)
2006 if (elem.is_in && elem.is_in == ent->is_in)
2007 continue;
2008 else
2010 struct gomp_task *tsk = ent->task;
2011 if (!tsk->parent_depends_on)
2013 tsk->parent_depends_on = true;
2014 ++num_awaited;
2015 /* If dependency TSK itself has no dependencies and is
2016 ready to run, move it up front so that we run it as
2017 soon as possible. */
2018 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
2019 priority_queue_upgrade_task (tsk, task);
2023 if (num_awaited == 0)
2025 gomp_mutex_unlock (&team->task_lock);
2026 return;
2029 memset (&taskwait, 0, sizeof (taskwait));
2030 taskwait.n_depend = num_awaited;
2031 gomp_sem_init (&taskwait.taskwait_sem, 0);
2032 task->taskwait = &taskwait;
2034 while (1)
2036 bool cancelled = false;
2037 if (taskwait.n_depend == 0)
2039 task->taskwait = NULL;
2040 gomp_mutex_unlock (&team->task_lock);
2041 if (to_free)
2043 gomp_finish_task (to_free);
2044 free (to_free);
2046 gomp_sem_destroy (&taskwait.taskwait_sem);
2047 return;
2050 /* Theoretically when we have multiple priorities, we should
2051 chose between the highest priority item in
2052 task->children_queue and team->task_queue here, so we should
2053 use priority_queue_next_task(). However, since we are
2054 running an undeferred task, perhaps that makes all tasks it
2055 depends on undeferred, thus a priority of INF? This would
2056 make it unnecessary to take anything into account here,
2057 but the dependencies.
2059 On the other hand, if we want to use priority_queue_next_task(),
2060 care should be taken to only use priority_queue_remove()
2061 below if the task was actually removed from the children
2062 queue. */
2063 bool ignored;
2064 struct gomp_task *next_task
2065 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2066 PQ_IGNORED, NULL, &ignored);
2068 if (next_task->kind == GOMP_TASK_WAITING)
2070 child_task = next_task;
2071 cancelled
2072 = gomp_task_run_pre (child_task, task, team);
2073 if (__builtin_expect (cancelled, 0))
2075 if (to_free)
2077 gomp_finish_task (to_free);
2078 free (to_free);
2079 to_free = NULL;
2081 goto finish_cancelled;
2084 else
2085 /* All tasks we are waiting for are either running in other
2086 threads, or they are tasks that have not had their
2087 dependencies met (so they're not even in the queue). Wait
2088 for them. */
2089 taskwait.in_depend_wait = true;
2090 gomp_mutex_unlock (&team->task_lock);
2091 if (do_wake)
2093 gomp_team_barrier_wake (&team->barrier, do_wake);
2094 do_wake = 0;
2096 if (to_free)
2098 gomp_finish_task (to_free);
2099 free (to_free);
2100 to_free = NULL;
2102 if (child_task)
2104 thr->task = child_task;
2105 if (__builtin_expect (child_task->fn == NULL, 0))
2107 if (gomp_target_task_fn (child_task->fn_data))
2109 thr->task = task;
2110 gomp_mutex_lock (&team->task_lock);
2111 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2112 struct gomp_target_task *ttask
2113 = (struct gomp_target_task *) child_task->fn_data;
2114 /* If GOMP_PLUGIN_target_task_completion has run already
2115 in between gomp_target_task_fn and the mutex lock,
2116 perform the requeuing here. */
2117 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2118 gomp_target_task_completion (team, child_task);
2119 else
2120 ttask->state = GOMP_TARGET_TASK_RUNNING;
2121 child_task = NULL;
2122 continue;
2125 else
2126 child_task->fn (child_task->fn_data);
2127 thr->task = task;
2129 else
2130 gomp_sem_wait (&taskwait.taskwait_sem);
2131 gomp_mutex_lock (&team->task_lock);
2132 if (child_task)
2134 finish_cancelled:;
2135 size_t new_tasks
2136 = gomp_task_run_post_handle_depend (child_task, team);
2137 if (child_task->parent_depends_on)
2138 --taskwait.n_depend;
2140 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
2141 child_task, MEMMODEL_RELAXED);
2142 child_task->pnode[PQ_CHILDREN].next = NULL;
2143 child_task->pnode[PQ_CHILDREN].prev = NULL;
2145 gomp_clear_parent (&child_task->children_queue);
2146 gomp_task_run_post_remove_taskgroup (child_task);
2147 to_free = child_task;
2148 child_task = NULL;
2149 team->task_count--;
2150 if (new_tasks > 1)
2152 do_wake = team->nthreads - team->task_running_count
2153 - !task->in_tied_task;
2154 if (do_wake > new_tasks)
2155 do_wake = new_tasks;
2161 /* Called when encountering a taskyield directive. */
2163 void
2164 GOMP_taskyield (void)
2166 /* Nothing at the moment. */
2169 static inline struct gomp_taskgroup *
2170 gomp_taskgroup_init (struct gomp_taskgroup *prev)
2172 struct gomp_taskgroup *taskgroup
2173 = gomp_malloc (sizeof (struct gomp_taskgroup));
2174 taskgroup->prev = prev;
2175 priority_queue_init (&taskgroup->taskgroup_queue);
2176 taskgroup->reductions = prev ? prev->reductions : NULL;
2177 taskgroup->in_taskgroup_wait = false;
2178 taskgroup->cancelled = false;
2179 taskgroup->workshare = false;
2180 taskgroup->num_children = 0;
2181 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
2182 return taskgroup;
2185 void
2186 GOMP_taskgroup_start (void)
2188 struct gomp_thread *thr = gomp_thread ();
2189 struct gomp_team *team = thr->ts.team;
2190 struct gomp_task *task = thr->task;
2192 /* If team is NULL, all tasks are executed as
2193 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
2194 taskgroup and their descendant tasks will be finished
2195 by the time GOMP_taskgroup_end is called. */
2196 if (team == NULL)
2197 return;
2198 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2201 void
2202 GOMP_taskgroup_end (void)
2204 struct gomp_thread *thr = gomp_thread ();
2205 struct gomp_team *team = thr->ts.team;
2206 struct gomp_task *task = thr->task;
2207 struct gomp_taskgroup *taskgroup;
2208 struct gomp_task *child_task = NULL;
2209 struct gomp_task *to_free = NULL;
2210 int do_wake = 0;
2212 if (team == NULL)
2213 return;
2214 taskgroup = task->taskgroup;
2215 if (__builtin_expect (taskgroup == NULL, 0)
2216 && thr->ts.level == 0)
2218 /* This can happen if GOMP_taskgroup_start is called when
2219 thr->ts.team == NULL, but inside of the taskgroup there
2220 is #pragma omp target nowait that creates an implicit
2221 team with a single thread. In this case, we want to wait
2222 for all outstanding tasks in this team. */
2223 gomp_team_barrier_wait (&team->barrier);
2224 return;
2227 /* The acquire barrier on load of taskgroup->num_children here
2228 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
2229 It is not necessary that we synchronize with other non-0 writes at
2230 this point, but we must ensure that all writes to memory by a
2231 child thread task work function are seen before we exit from
2232 GOMP_taskgroup_end. */
2233 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
2234 goto finish;
2236 bool unused;
2237 gomp_mutex_lock (&team->task_lock);
2238 while (1)
2240 bool cancelled = false;
2241 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
2242 MEMMODEL_RELAXED))
2244 if (taskgroup->num_children)
2246 if (priority_queue_empty_p (&task->children_queue,
2247 MEMMODEL_RELAXED))
2248 goto do_wait;
2249 child_task
2250 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2251 PQ_TEAM, &team->task_queue,
2252 &unused);
2254 else
2256 gomp_mutex_unlock (&team->task_lock);
2257 if (to_free)
2259 gomp_finish_task (to_free);
2260 free (to_free);
2262 goto finish;
2265 else
2266 child_task
2267 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2268 PQ_TEAM, &team->task_queue, &unused);
2269 if (child_task->kind == GOMP_TASK_WAITING)
2271 cancelled
2272 = gomp_task_run_pre (child_task, child_task->parent, team);
2273 if (__builtin_expect (cancelled, 0))
2275 if (to_free)
2277 gomp_finish_task (to_free);
2278 free (to_free);
2279 to_free = NULL;
2281 goto finish_cancelled;
2284 else
2286 child_task = NULL;
2287 do_wait:
2288 /* All tasks we are waiting for are either running in other
2289 threads, or they are tasks that have not had their
2290 dependencies met (so they're not even in the queue). Wait
2291 for them. */
2292 taskgroup->in_taskgroup_wait = true;
2294 gomp_mutex_unlock (&team->task_lock);
2295 if (do_wake)
2297 gomp_team_barrier_wake (&team->barrier, do_wake);
2298 do_wake = 0;
2300 if (to_free)
2302 gomp_finish_task (to_free);
2303 free (to_free);
2304 to_free = NULL;
2306 if (child_task)
2308 thr->task = child_task;
2309 if (__builtin_expect (child_task->fn == NULL, 0))
2311 if (gomp_target_task_fn (child_task->fn_data))
2313 thr->task = task;
2314 gomp_mutex_lock (&team->task_lock);
2315 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2316 struct gomp_target_task *ttask
2317 = (struct gomp_target_task *) child_task->fn_data;
2318 /* If GOMP_PLUGIN_target_task_completion has run already
2319 in between gomp_target_task_fn and the mutex lock,
2320 perform the requeuing here. */
2321 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2322 gomp_target_task_completion (team, child_task);
2323 else
2324 ttask->state = GOMP_TARGET_TASK_RUNNING;
2325 child_task = NULL;
2326 continue;
2329 else
2330 child_task->fn (child_task->fn_data);
2331 thr->task = task;
2333 else
2334 gomp_sem_wait (&taskgroup->taskgroup_sem);
2335 gomp_mutex_lock (&team->task_lock);
2336 if (child_task)
2338 if (child_task->detach_team)
2340 assert (child_task->detach_team == team);
2341 child_task->kind = GOMP_TASK_DETACHED;
2342 ++team->task_detach_count;
2343 gomp_debug (0,
2344 "thread %d: task with event %p finished without "
2345 "completion event fulfilled in taskgroup\n",
2346 thr->ts.team_id, child_task);
2347 child_task = NULL;
2348 continue;
2351 finish_cancelled:;
2352 size_t new_tasks
2353 = gomp_task_run_post_handle_depend (child_task, team);
2354 gomp_task_run_post_remove_parent (child_task);
2355 gomp_clear_parent (&child_task->children_queue);
2356 gomp_task_run_post_remove_taskgroup (child_task);
2357 to_free = child_task;
2358 child_task = NULL;
2359 team->task_count--;
2360 if (new_tasks > 1)
2362 do_wake = team->nthreads - team->task_running_count
2363 - !task->in_tied_task;
2364 if (do_wake > new_tasks)
2365 do_wake = new_tasks;
2370 finish:
2371 task->taskgroup = taskgroup->prev;
2372 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2373 free (taskgroup);
2376 static inline __attribute__((always_inline)) void
2377 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2378 unsigned nthreads)
2380 size_t total_cnt = 0;
2381 uintptr_t *d = data;
2382 struct htab *old_htab = NULL, *new_htab;
2385 if (__builtin_expect (orig != NULL, 0))
2387 /* For worksharing task reductions, memory has been allocated
2388 already by some other thread that encountered the construct
2389 earlier. */
2390 d[2] = orig[2];
2391 d[6] = orig[6];
2392 orig = (uintptr_t *) orig[4];
2394 else
2396 size_t sz = d[1] * nthreads;
2397 /* Should use omp_alloc if d[3] is not -1. */
2398 void *ptr = gomp_aligned_alloc (d[2], sz);
2399 memset (ptr, '\0', sz);
2400 d[2] = (uintptr_t) ptr;
2401 d[6] = d[2] + sz;
2403 d[5] = 0;
2404 total_cnt += d[0];
2405 if (d[4] == 0)
2407 d[4] = (uintptr_t) old;
2408 break;
2410 else
2411 d = (uintptr_t *) d[4];
2413 while (1);
2414 if (old && old[5])
2416 old_htab = (struct htab *) old[5];
2417 total_cnt += htab_elements (old_htab);
2419 new_htab = htab_create (total_cnt);
2420 if (old_htab)
2422 /* Copy old hash table, like in htab_expand. */
2423 hash_entry_type *p, *olimit;
2424 new_htab->n_elements = htab_elements (old_htab);
2425 olimit = old_htab->entries + old_htab->size;
2426 p = old_htab->entries;
2429 hash_entry_type x = *p;
2430 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2431 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2432 p++;
2434 while (p < olimit);
2436 d = data;
2439 size_t j;
2440 for (j = 0; j < d[0]; ++j)
2442 uintptr_t *p = d + 7 + j * 3;
2443 p[2] = (uintptr_t) d;
2444 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2445 which hash on the first element which is a pointer. We need
2446 to hash also on the first sizeof (uintptr_t) bytes which contain
2447 a pointer. Hide the cast from the compiler. */
2448 hash_entry_type n;
2449 __asm ("" : "=g" (n) : "0" (p));
2450 *htab_find_slot (&new_htab, n, INSERT) = n;
2452 if (d[4] == (uintptr_t) old)
2453 break;
2454 else
2455 d = (uintptr_t *) d[4];
2457 while (1);
2458 d[5] = (uintptr_t) new_htab;
2461 static void
2462 gomp_create_artificial_team (void)
2464 struct gomp_thread *thr = gomp_thread ();
2465 struct gomp_task_icv *icv;
2466 struct gomp_team *team = gomp_new_team (1);
2467 struct gomp_task *task = thr->task;
2468 struct gomp_task **implicit_task = &task;
2469 icv = task ? &task->icv : &gomp_global_icv;
2470 team->prev_ts = thr->ts;
2471 thr->ts.team = team;
2472 thr->ts.team_id = 0;
2473 thr->ts.work_share = &team->work_shares[0];
2474 thr->ts.last_work_share = NULL;
2475 #ifdef HAVE_SYNC_BUILTINS
2476 thr->ts.single_count = 0;
2477 #endif
2478 thr->ts.static_trip = 0;
2479 thr->task = &team->implicit_task[0];
2480 gomp_init_task (thr->task, NULL, icv);
2481 while (*implicit_task
2482 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2483 implicit_task = &(*implicit_task)->parent;
2484 if (*implicit_task)
2486 thr->task = *implicit_task;
2487 gomp_end_task ();
2488 free (*implicit_task);
2489 thr->task = &team->implicit_task[0];
2491 #ifdef LIBGOMP_USE_PTHREADS
2492 else
2493 pthread_setspecific (gomp_thread_destructor, thr);
2494 #endif
2495 if (implicit_task != &task)
2497 *implicit_task = thr->task;
2498 thr->task = task;
2502 /* The format of data is:
2503 data[0] cnt
2504 data[1] size
2505 data[2] alignment (on output array pointer)
2506 data[3] allocator (-1 if malloc allocator)
2507 data[4] next pointer
2508 data[5] used internally (htab pointer)
2509 data[6] used internally (end of array)
2510 cnt times
2511 ent[0] address
2512 ent[1] offset
2513 ent[2] used internally (pointer to data[0])
2514 The entries are sorted by increasing offset, so that a binary
2515 search can be performed. Normally, data[8] is 0, exception is
2516 for worksharing construct task reductions in cancellable parallel,
2517 where at offset 0 there should be space for a pointer and an integer
2518 which are used internally. */
2520 void
2521 GOMP_taskgroup_reduction_register (uintptr_t *data)
2523 struct gomp_thread *thr = gomp_thread ();
2524 struct gomp_team *team = thr->ts.team;
2525 struct gomp_task *task;
2526 unsigned nthreads;
2527 if (__builtin_expect (team == NULL, 0))
2529 /* The task reduction code needs a team and task, so for
2530 orphaned taskgroups just create the implicit team. */
2531 gomp_create_artificial_team ();
2532 ialias_call (GOMP_taskgroup_start) ();
2533 team = thr->ts.team;
2535 nthreads = team->nthreads;
2536 task = thr->task;
2537 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2538 task->taskgroup->reductions = data;
2541 void
2542 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2544 uintptr_t *d = data;
2545 htab_free ((struct htab *) data[5]);
2548 gomp_aligned_free ((void *) d[2]);
2549 d = (uintptr_t *) d[4];
2551 while (d && !d[5]);
2553 ialias (GOMP_taskgroup_reduction_unregister)
2555 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2556 original list item or address of previously remapped original list
2557 item to address of the private copy, store that to ptrs[i].
2558 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2559 the original list item. */
2561 void
2562 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2564 struct gomp_thread *thr = gomp_thread ();
2565 struct gomp_task *task = thr->task;
2566 unsigned id = thr->ts.team_id;
2567 uintptr_t *data = task->taskgroup->reductions;
2568 uintptr_t *d;
2569 struct htab *reduction_htab = (struct htab *) data[5];
2570 size_t i;
2571 for (i = 0; i < cnt; ++i)
2573 hash_entry_type ent, n;
2574 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2575 n = htab_find (reduction_htab, ent);
2576 if (n)
2578 uintptr_t *p;
2579 __asm ("" : "=g" (p) : "0" (n));
2580 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2581 p[1] is the offset within the allocated chunk for each
2582 thread, p[2] is the array registered with
2583 GOMP_taskgroup_reduction_register, d[2] is the base of the
2584 allocated memory and d[1] is the size of the allocated chunk
2585 for one thread. */
2586 d = (uintptr_t *) p[2];
2587 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2588 if (__builtin_expect (i < cntorig, 0))
2589 ptrs[cnt + i] = (void *) p[0];
2590 continue;
2592 d = data;
2593 while (d != NULL)
2595 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2596 break;
2597 d = (uintptr_t *) d[4];
2599 if (d == NULL)
2600 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2601 "task modifier for %p", ptrs[i]);
2602 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2603 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2604 if (__builtin_expect (i < cntorig, 0))
2606 size_t lo = 0, hi = d[0] - 1;
2607 while (lo <= hi)
2609 size_t m = (lo + hi) / 2;
2610 if (d[7 + 3 * m + 1] < off)
2611 lo = m + 1;
2612 else if (d[7 + 3 * m + 1] == off)
2614 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2615 break;
2617 else
2618 hi = m - 1;
2620 if (lo > hi)
2621 gomp_fatal ("couldn't find matching task_reduction or reduction "
2622 "with task modifier for %p", ptrs[i]);
2627 struct gomp_taskgroup *
2628 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2630 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2631 gomp_reduction_register (data, NULL, NULL, nthreads);
2632 taskgroup->reductions = data;
2633 return taskgroup;
2636 void
2637 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2639 struct gomp_thread *thr = gomp_thread ();
2640 struct gomp_team *team = thr->ts.team;
2641 struct gomp_task *task = thr->task;
2642 unsigned nthreads = team->nthreads;
2643 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2644 task->taskgroup->reductions = data;
2647 void
2648 gomp_workshare_taskgroup_start (void)
2650 struct gomp_thread *thr = gomp_thread ();
2651 struct gomp_team *team = thr->ts.team;
2652 struct gomp_task *task;
2654 if (team == NULL)
2656 gomp_create_artificial_team ();
2657 team = thr->ts.team;
2659 task = thr->task;
2660 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2661 task->taskgroup->workshare = true;
2664 void
2665 GOMP_workshare_task_reduction_unregister (bool cancelled)
2667 struct gomp_thread *thr = gomp_thread ();
2668 struct gomp_task *task = thr->task;
2669 struct gomp_team *team = thr->ts.team;
2670 uintptr_t *data = task->taskgroup->reductions;
2671 ialias_call (GOMP_taskgroup_end) ();
2672 if (thr->ts.team_id == 0)
2673 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2674 else
2675 htab_free ((struct htab *) data[5]);
2677 if (!cancelled)
2678 gomp_team_barrier_wait (&team->barrier);
2682 omp_in_final (void)
2684 struct gomp_thread *thr = gomp_thread ();
2685 return thr->task && thr->task->final_task;
2688 ialias (omp_in_final)
2691 omp_in_explicit_task (void)
2693 struct gomp_thread *thr = gomp_thread ();
2694 struct gomp_task *task = thr->task;
2695 return task && task->kind != GOMP_TASK_IMPLICIT;
2698 ialias (omp_in_explicit_task)
2700 void
2701 omp_fulfill_event (omp_event_handle_t event)
2703 struct gomp_task *task = (struct gomp_task *) event;
2704 if (!task->deferred_p)
2706 if (gomp_sem_getcount (task->completion_sem) > 0)
2707 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2709 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2710 task);
2711 gomp_sem_post (task->completion_sem);
2712 return;
2715 struct gomp_team *team = __atomic_load_n (&task->detach_team,
2716 MEMMODEL_RELAXED);
2717 if (!team)
2718 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2719 "been fulfilled!\n", task);
2721 gomp_mutex_lock (&team->task_lock);
2722 if (task->kind != GOMP_TASK_DETACHED)
2724 /* The task has not finished running yet. */
2725 gomp_debug (0,
2726 "omp_fulfill_event: %p event fulfilled for unfinished "
2727 "task\n", task);
2728 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2729 gomp_mutex_unlock (&team->task_lock);
2730 return;
2733 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2734 task);
2735 size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2736 gomp_task_run_post_remove_parent (task);
2737 gomp_clear_parent (&task->children_queue);
2738 gomp_task_run_post_remove_taskgroup (task);
2739 team->task_count--;
2740 team->task_detach_count--;
2742 int do_wake = 0;
2743 bool shackled_thread_p = team == gomp_thread ()->ts.team;
2744 if (new_tasks > 0)
2746 /* Wake up threads to run new tasks. */
2747 gomp_team_barrier_set_task_pending (&team->barrier);
2748 do_wake = team->nthreads - team->task_running_count;
2749 if (do_wake > new_tasks)
2750 do_wake = new_tasks;
2753 if (!shackled_thread_p
2754 && !do_wake
2755 && team->task_detach_count == 0
2756 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2757 /* Ensure that at least one thread is woken up to signal that the
2758 barrier can finish. */
2759 do_wake = 1;
2761 /* If we are running in an unshackled thread, the team might vanish before
2762 gomp_team_barrier_wake is run if we release the lock first, so keep the
2763 lock for the call in that case. */
2764 if (shackled_thread_p)
2765 gomp_mutex_unlock (&team->task_lock);
2766 if (do_wake)
2767 gomp_team_barrier_wake (&team->barrier, do_wake);
2768 if (!shackled_thread_p)
2769 gomp_mutex_unlock (&team->task_lock);
2771 gomp_finish_task (task);
2772 free (task);
2775 ialias (omp_fulfill_event)