compiler: load LHS subexpressions of op= assignment only once
[official-gcc.git] / libgomp / task.c
blobe9a28bf71cba9c25689ab9d11937514958c92a7f
1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
35 typedef struct gomp_task_depend_entry *hash_entry_type;
37 static inline void *
38 htab_alloc (size_t size)
40 return gomp_malloc (size);
43 static inline void
44 htab_free (void *ptr)
46 free (ptr);
49 #include "hashtab.h"
51 static inline hashval_t
52 htab_hash (hash_entry_type element)
54 return hash_pointer (element->addr);
57 static inline bool
58 htab_eq (hash_entry_type x, hash_entry_type y)
60 return x->addr == y->addr;
63 /* Create a new task data structure. */
65 void
66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 struct gomp_task_icv *prev_icv)
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task->parent = parent_task;
78 priority_queue_init (&task->children_queue);
79 task->taskgroup = NULL;
80 task->dependers = NULL;
81 task->depend_hash = NULL;
82 task->taskwait = NULL;
83 task->depend_all_memory = NULL;
84 task->depend_count = 0;
85 task->completion_sem = NULL;
86 task->deferred_p = false;
87 task->icv = *prev_icv;
88 task->kind = GOMP_TASK_IMPLICIT;
89 task->in_tied_task = false;
90 task->final_task = false;
91 task->copy_ctors_done = false;
92 task->parent_depends_on = false;
95 /* Clean up a task, after completing it. */
97 void
98 gomp_end_task (void)
100 struct gomp_thread *thr = gomp_thread ();
101 struct gomp_task *task = thr->task;
103 gomp_finish_task (task);
104 thr->task = task->parent;
107 /* Clear the parent field of every task in LIST. */
109 static inline void
110 gomp_clear_parent_in_list (struct priority_list *list)
112 struct priority_node *p = list->tasks;
113 if (p)
116 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
117 p = p->next;
119 while (p != list->tasks);
122 /* Splay tree version of gomp_clear_parent_in_list.
124 Clear the parent field of every task in NODE within SP, and free
125 the node when done. */
127 static void
128 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
130 if (!node)
131 return;
132 prio_splay_tree_node left = node->left, right = node->right;
133 gomp_clear_parent_in_list (&node->key.l);
134 #if _LIBGOMP_CHECKING_
135 memset (node, 0xaf, sizeof (*node));
136 #endif
137 /* No need to remove the node from the tree. We're nuking
138 everything, so just free the nodes and our caller can clear the
139 entire splay tree. */
140 free (node);
141 gomp_clear_parent_in_tree (sp, left);
142 gomp_clear_parent_in_tree (sp, right);
145 /* Clear the parent field of every task in Q and remove every task
146 from Q. */
148 static inline void
149 gomp_clear_parent (struct priority_queue *q)
151 if (priority_queue_multi_p (q))
153 gomp_clear_parent_in_tree (&q->t, q->t.root);
154 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
155 No need to remove anything. We can just nuke everything. */
156 q->t.root = NULL;
158 else
159 gomp_clear_parent_in_list (&q->l);
162 /* Helper function for GOMP_task and gomp_create_target_task.
164 For a TASK with in/out dependencies, fill in the various dependency
165 queues. PARENT is the parent of said task. DEPEND is as in
166 GOMP_task. */
168 static void
169 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
170 void **depend)
172 size_t ndepend = (uintptr_t) depend[0];
173 size_t i;
174 hash_entry_type ent;
175 bool all_memory = false;
177 if (ndepend)
179 /* depend[0] is total # */
180 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
181 /* ndepend - nout is # of in: */
182 for (i = 0; i < ndepend; i++)
184 task->depend[i].addr = depend[2 + i];
185 task->depend[i].is_in = i >= nout;
186 all_memory |= i < nout && depend[2 + i] == NULL;
189 else
191 ndepend = (uintptr_t) depend[1]; /* total # */
192 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
193 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
194 /* For now we treat mutexinoutset like out, which is compliant, but
195 inefficient. */
196 size_t nin = (uintptr_t) depend[4]; /* # of in: */
197 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
198 size_t normal = nout + nmutexinoutset + nin;
199 size_t n = 0;
200 bool has_in = false;
201 for (i = normal; i < ndepend; i++)
203 void **d = (void **) (uintptr_t) depend[5 + i];
204 switch ((uintptr_t) d[1])
206 case GOMP_DEPEND_OUT:
207 case GOMP_DEPEND_INOUT:
208 all_memory |= d[0] == NULL;
209 break;
210 case GOMP_DEPEND_MUTEXINOUTSET:
211 break;
212 case GOMP_DEPEND_IN:
213 case GOMP_DEPEND_INOUTSET:
214 has_in = true;
215 continue;
216 default:
217 gomp_fatal ("unknown omp_depend_t dependence type %d",
218 (int) (uintptr_t) d[1]);
220 task->depend[n].addr = d[0];
221 task->depend[n++].is_in = 0;
223 for (i = 0; i < normal; i++)
225 task->depend[n].addr = depend[5 + i];
226 task->depend[n++].is_in = i >= nout + nmutexinoutset;
228 if (has_in)
229 for (i = normal; i < ndepend; i++)
231 void **d = (void **) (uintptr_t) depend[5 + i];
232 if ((uintptr_t) d[1] != GOMP_DEPEND_IN
233 && (uintptr_t) d[1] != GOMP_DEPEND_INOUTSET)
234 continue;
235 task->depend[n].addr = d[0];
236 task->depend[n++].is_in
237 = 1 + ((uintptr_t) d[1] == GOMP_DEPEND_INOUTSET);
240 task->num_dependees = 0;
241 if (__builtin_expect (parent->depend_all_memory && ndepend, false))
243 struct gomp_task *tsk = parent->depend_all_memory;
244 if (tsk->dependers == NULL)
246 tsk->dependers
247 = gomp_malloc (sizeof (struct gomp_dependers_vec)
248 + 6 * sizeof (struct gomp_task *));
249 tsk->dependers->n_elem = 1;
250 tsk->dependers->allocated = 6;
251 tsk->dependers->elem[0] = task;
253 else
255 if (tsk->dependers->n_elem == tsk->dependers->allocated)
257 tsk->dependers->allocated
258 = tsk->dependers->allocated * 2 + 2;
259 tsk->dependers
260 = gomp_realloc (tsk->dependers,
261 sizeof (struct gomp_dependers_vec)
262 + (tsk->dependers->allocated
263 * sizeof (struct gomp_task *)));
265 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
267 task->num_dependees++;
269 if (__builtin_expect (all_memory, false))
271 /* A task with depend(inout: omp_all_memory) depends on all previous
272 sibling tasks which have any dependencies and all later sibling
273 tasks which have any dependencies depend on it. */
274 task->depend_count = 1;
275 task->depend[0].addr = NULL;
276 task->depend[0].next = NULL;
277 task->depend[0].prev = NULL;
278 task->depend[0].task = task;
279 task->depend[0].redundant = true;
280 task->depend[0].redundant_out = false;
281 if (parent->depend_hash)
283 /* Inlined htab_traverse + htab_clear. All newer siblings can
284 just depend on this task. Add dependencies on all previous
285 sibling tasks with dependencies and make them redundant and
286 clear the hash table. */
287 hash_entry_type *slot = &parent->depend_hash->entries[0];
288 hash_entry_type *end = slot + htab_size (parent->depend_hash);
289 for (; slot != end; ++slot)
291 if (*slot == HTAB_EMPTY_ENTRY)
292 continue;
293 if (*slot != HTAB_DELETED_ENTRY)
295 for (ent = *slot; ent; ent = ent->next)
297 struct gomp_task *tsk = ent->task;
299 if (ent->redundant_out)
300 break;
302 ent->redundant = true;
303 if (tsk->dependers == NULL)
305 tsk->dependers
306 = gomp_malloc (sizeof (struct gomp_dependers_vec)
307 + 6 * sizeof (struct gomp_task *));
308 tsk->dependers->n_elem = 1;
309 tsk->dependers->allocated = 6;
310 tsk->dependers->elem[0] = task;
311 task->num_dependees++;
312 continue;
314 /* We already have some other dependency on tsk from
315 earlier depend clause. */
316 else if (tsk->dependers->n_elem
317 && (tsk->dependers->elem[tsk->dependers->n_elem
318 - 1] == task))
319 continue;
320 else if (tsk->dependers->n_elem
321 == tsk->dependers->allocated)
323 tsk->dependers->allocated
324 = tsk->dependers->allocated * 2 + 2;
325 tsk->dependers
326 = gomp_realloc (tsk->dependers,
327 sizeof (struct gomp_dependers_vec)
328 + (tsk->dependers->allocated
329 * sizeof (struct gomp_task *)));
331 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
332 task->num_dependees++;
334 while (ent)
336 ent->redundant = true;
337 ent = ent->next;
340 *slot = HTAB_EMPTY_ENTRY;
342 if (htab_size (parent->depend_hash) <= 32)
344 parent->depend_hash->n_elements = 0;
345 parent->depend_hash->n_deleted = 0;
347 else
349 /* Shrink the hash table if it would be too large.
350 We don't want to walk e.g. megabytes of empty hash
351 table for every depend(inout: omp_all_memory). */
352 free (parent->depend_hash);
353 parent->depend_hash = htab_create (12);
356 parent->depend_all_memory = task;
357 return;
359 task->depend_count = ndepend;
360 if (parent->depend_hash == NULL)
361 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
362 for (i = 0; i < ndepend; i++)
364 task->depend[i].next = NULL;
365 task->depend[i].prev = NULL;
366 task->depend[i].task = task;
367 task->depend[i].redundant = false;
368 task->depend[i].redundant_out = false;
370 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
371 &task->depend[i], INSERT);
372 hash_entry_type out = NULL, last = NULL;
373 if (*slot)
375 /* If multiple depends on the same task are the same, all but the
376 first one are redundant. As inout/out come first, if any of them
377 is inout/out, it will win, which is the right semantics. */
378 if ((*slot)->task == task)
380 task->depend[i].redundant = true;
381 continue;
383 for (ent = *slot; ent; ent = ent->next)
385 if (ent->redundant_out)
386 break;
388 last = ent;
390 /* depend(in:...) doesn't depend on earlier depend(in:...).
391 Similarly depend(inoutset:...) doesn't depend on earlier
392 depend(inoutset:...). */
393 if (task->depend[i].is_in && task->depend[i].is_in == ent->is_in)
394 continue;
396 if (!ent->is_in)
397 out = ent;
399 struct gomp_task *tsk = ent->task;
400 if (tsk->dependers == NULL)
402 tsk->dependers
403 = gomp_malloc (sizeof (struct gomp_dependers_vec)
404 + 6 * sizeof (struct gomp_task *));
405 tsk->dependers->n_elem = 1;
406 tsk->dependers->allocated = 6;
407 tsk->dependers->elem[0] = task;
408 task->num_dependees++;
409 continue;
411 /* We already have some other dependency on tsk from earlier
412 depend clause. */
413 else if (tsk->dependers->n_elem
414 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
415 == task))
416 continue;
417 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
419 tsk->dependers->allocated
420 = tsk->dependers->allocated * 2 + 2;
421 tsk->dependers
422 = gomp_realloc (tsk->dependers,
423 sizeof (struct gomp_dependers_vec)
424 + (tsk->dependers->allocated
425 * sizeof (struct gomp_task *)));
427 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
428 task->num_dependees++;
430 task->depend[i].next = *slot;
431 (*slot)->prev = &task->depend[i];
433 *slot = &task->depend[i];
435 /* There is no need to store more than one depend({,in}out:) task per
436 address in the hash table chain for the purpose of creation of
437 deferred tasks, because each out depends on all earlier outs, thus it
438 is enough to record just the last depend({,in}out:). For depend(in:),
439 we need to keep all of the previous ones not terminated yet, because
440 a later depend({,in}out:) might need to depend on all of them. So, if
441 the new task's clause is depend({,in}out:), we know there is at most
442 one other depend({,in}out:) clause in the list (out). For
443 non-deferred tasks we want to see all outs, so they are moved to the
444 end of the chain, after first redundant_out entry all following
445 entries should be redundant_out. */
446 if (!task->depend[i].is_in && out)
448 if (out != last)
450 out->next->prev = out->prev;
451 out->prev->next = out->next;
452 out->next = last->next;
453 out->prev = last;
454 last->next = out;
455 if (out->next)
456 out->next->prev = out;
458 out->redundant_out = true;
463 /* Called when encountering an explicit task directive. If IF_CLAUSE is
464 false, then we must not delay in executing the task. If UNTIED is true,
465 then the task may be executed by any member of the team.
467 DEPEND is an array containing:
468 if depend[0] is non-zero, then:
469 depend[0]: number of depend elements.
470 depend[1]: number of depend elements of type "out/inout".
471 depend[2..N+1]: address of [1..N]th depend element.
472 otherwise, when depend[0] is zero, then:
473 depend[1]: number of depend elements.
474 depend[2]: number of depend elements of type "out/inout".
475 depend[3]: number of depend elements of type "mutexinoutset".
476 depend[4]: number of depend elements of type "in".
477 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
478 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
479 omp_depend_t objects. */
481 void
482 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
483 long arg_size, long arg_align, bool if_clause, unsigned flags,
484 void **depend, int priority_arg, void *detach)
486 struct gomp_thread *thr = gomp_thread ();
487 struct gomp_team *team = thr->ts.team;
488 int priority = 0;
490 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
491 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
492 tied to one thread all the time. This means UNTIED tasks must be
493 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
494 might be running on different thread than FN. */
495 if (cpyfn)
496 if_clause = false;
497 flags &= ~GOMP_TASK_FLAG_UNTIED;
498 #endif
500 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
501 if (__builtin_expect (gomp_cancel_var, 0) && team)
503 if (gomp_team_barrier_cancelled (&team->barrier))
504 return;
505 if (thr->task->taskgroup)
507 if (thr->task->taskgroup->cancelled)
508 return;
509 if (thr->task->taskgroup->workshare
510 && thr->task->taskgroup->prev
511 && thr->task->taskgroup->prev->cancelled)
512 return;
516 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
518 priority = priority_arg;
519 if (priority > gomp_max_task_priority_var)
520 priority = gomp_max_task_priority_var;
523 if (!if_clause || team == NULL
524 || (thr->task && thr->task->final_task)
525 || team->task_count > 64 * team->nthreads)
527 struct gomp_task task;
528 gomp_sem_t completion_sem;
530 /* If there are depend clauses and earlier deferred sibling tasks
531 with depend clauses, check if there isn't a dependency. If there
532 is, we need to wait for them. There is no need to handle
533 depend clauses for non-deferred tasks other than this, because
534 the parent task is suspended until the child task finishes and thus
535 it can't start further child tasks. */
536 if ((flags & GOMP_TASK_FLAG_DEPEND)
537 && thr->task && thr->task->depend_hash)
538 gomp_task_maybe_wait_for_dependencies (depend);
540 gomp_init_task (&task, thr->task, gomp_icv (false));
541 task.kind = GOMP_TASK_UNDEFERRED;
542 task.final_task = (thr->task && thr->task->final_task)
543 || (flags & GOMP_TASK_FLAG_FINAL);
544 task.priority = priority;
546 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
548 gomp_sem_init (&completion_sem, 0);
549 task.completion_sem = &completion_sem;
550 *(void **) detach = &task;
551 if (data)
552 *(void **) data = &task;
554 gomp_debug (0, "Thread %d: new event: %p\n",
555 thr->ts.team_id, &task);
558 if (thr->task)
560 task.in_tied_task = thr->task->in_tied_task;
561 task.taskgroup = thr->task->taskgroup;
563 thr->task = &task;
564 if (__builtin_expect (cpyfn != NULL, 0))
566 char buf[arg_size + arg_align - 1];
567 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
568 & ~(uintptr_t) (arg_align - 1));
569 cpyfn (arg, data);
570 fn (arg);
572 else
573 fn (data);
575 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
577 gomp_sem_wait (&completion_sem);
578 gomp_sem_destroy (&completion_sem);
581 /* Access to "children" is normally done inside a task_lock
582 mutex region, but the only way this particular task.children
583 can be set is if this thread's task work function (fn)
584 creates children. So since the setter is *this* thread, we
585 need no barriers here when testing for non-NULL. We can have
586 task.children set by the current thread then changed by a
587 child thread, but seeing a stale non-NULL value is not a
588 problem. Once past the task_lock acquisition, this thread
589 will see the real value of task.children. */
590 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
592 gomp_mutex_lock (&team->task_lock);
593 gomp_clear_parent (&task.children_queue);
594 gomp_mutex_unlock (&team->task_lock);
596 gomp_end_task ();
598 else
600 struct gomp_task *task;
601 struct gomp_task *parent = thr->task;
602 struct gomp_taskgroup *taskgroup = parent->taskgroup;
603 char *arg;
604 bool do_wake;
605 size_t depend_size = 0;
607 if (flags & GOMP_TASK_FLAG_DEPEND)
608 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
609 * sizeof (struct gomp_task_depend_entry));
610 task = gomp_malloc (sizeof (*task) + depend_size
611 + arg_size + arg_align - 1);
612 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
613 & ~(uintptr_t) (arg_align - 1));
614 gomp_init_task (task, parent, gomp_icv (false));
615 task->priority = priority;
616 task->kind = GOMP_TASK_UNDEFERRED;
617 task->in_tied_task = parent->in_tied_task;
618 task->taskgroup = taskgroup;
619 task->deferred_p = true;
620 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
622 task->detach_team = team;
624 *(void **) detach = task;
625 if (data)
626 *(void **) data = task;
628 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
630 thr->task = task;
631 if (cpyfn)
633 cpyfn (arg, data);
634 task->copy_ctors_done = true;
636 else
637 memcpy (arg, data, arg_size);
638 thr->task = parent;
639 task->kind = GOMP_TASK_WAITING;
640 task->fn = fn;
641 task->fn_data = arg;
642 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
643 gomp_mutex_lock (&team->task_lock);
644 /* If parallel or taskgroup has been cancelled, don't start new
645 tasks. */
646 if (__builtin_expect (gomp_cancel_var, 0)
647 && !task->copy_ctors_done)
649 if (gomp_team_barrier_cancelled (&team->barrier))
651 do_cancel:
652 gomp_mutex_unlock (&team->task_lock);
653 gomp_finish_task (task);
654 free (task);
655 return;
657 if (taskgroup)
659 if (taskgroup->cancelled)
660 goto do_cancel;
661 if (taskgroup->workshare
662 && taskgroup->prev
663 && taskgroup->prev->cancelled)
664 goto do_cancel;
667 if (taskgroup)
668 taskgroup->num_children++;
669 if (depend_size)
671 gomp_task_handle_depend (task, parent, depend);
672 if (task->num_dependees)
674 /* Tasks that depend on other tasks are not put into the
675 various waiting queues, so we are done for now. Said
676 tasks are instead put into the queues via
677 gomp_task_run_post_handle_dependers() after their
678 dependencies have been satisfied. After which, they
679 can be picked up by the various scheduling
680 points. */
681 gomp_mutex_unlock (&team->task_lock);
682 return;
686 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
687 task, priority,
688 PRIORITY_INSERT_BEGIN,
689 /*adjust_parent_depends_on=*/false,
690 task->parent_depends_on);
691 if (taskgroup)
692 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
693 task, priority,
694 PRIORITY_INSERT_BEGIN,
695 /*adjust_parent_depends_on=*/false,
696 task->parent_depends_on);
698 priority_queue_insert (PQ_TEAM, &team->task_queue,
699 task, priority,
700 PRIORITY_INSERT_END,
701 /*adjust_parent_depends_on=*/false,
702 task->parent_depends_on);
704 ++team->task_count;
705 ++team->task_queued_count;
706 gomp_team_barrier_set_task_pending (&team->barrier);
707 do_wake = team->task_running_count + !parent->in_tied_task
708 < team->nthreads;
709 gomp_mutex_unlock (&team->task_lock);
710 if (do_wake)
711 gomp_team_barrier_wake (&team->barrier, 1);
715 ialias (GOMP_taskgroup_start)
716 ialias (GOMP_taskgroup_end)
717 ialias (GOMP_taskgroup_reduction_register)
719 #define TYPE long
720 #define UTYPE unsigned long
721 #define TYPE_is_long 1
722 #include "taskloop.c"
723 #undef TYPE
724 #undef UTYPE
725 #undef TYPE_is_long
727 #define TYPE unsigned long long
728 #define UTYPE TYPE
729 #define GOMP_taskloop GOMP_taskloop_ull
730 #include "taskloop.c"
731 #undef TYPE
732 #undef UTYPE
733 #undef GOMP_taskloop
735 static void inline
736 priority_queue_move_task_first (enum priority_queue_type type,
737 struct priority_queue *head,
738 struct gomp_task *task)
740 #if _LIBGOMP_CHECKING_
741 if (!priority_queue_task_in_queue_p (type, head, task))
742 gomp_fatal ("Attempt to move first missing task %p", task);
743 #endif
744 struct priority_list *list;
745 if (priority_queue_multi_p (head))
747 list = priority_queue_lookup_priority (head, task->priority);
748 #if _LIBGOMP_CHECKING_
749 if (!list)
750 gomp_fatal ("Unable to find priority %d", task->priority);
751 #endif
753 else
754 list = &head->l;
755 priority_list_remove (list, task_to_priority_node (type, task), 0);
756 priority_list_insert (type, list, task, task->priority,
757 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
758 task->parent_depends_on);
761 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
762 with team->task_lock held, or is executed in the thread that called
763 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
764 run before it acquires team->task_lock. */
766 static void
767 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
769 struct gomp_task *parent = task->parent;
770 if (parent)
771 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
772 task);
774 struct gomp_taskgroup *taskgroup = task->taskgroup;
775 if (taskgroup)
776 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
777 task);
779 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
780 PRIORITY_INSERT_BEGIN, false,
781 task->parent_depends_on);
782 task->kind = GOMP_TASK_WAITING;
783 if (parent && parent->taskwait)
785 if (parent->taskwait->in_taskwait)
787 /* One more task has had its dependencies met.
788 Inform any waiters. */
789 parent->taskwait->in_taskwait = false;
790 gomp_sem_post (&parent->taskwait->taskwait_sem);
792 else if (parent->taskwait->in_depend_wait)
794 /* One more task has had its dependencies met.
795 Inform any waiters. */
796 parent->taskwait->in_depend_wait = false;
797 gomp_sem_post (&parent->taskwait->taskwait_sem);
800 if (taskgroup && taskgroup->in_taskgroup_wait)
802 /* One more task has had its dependencies met.
803 Inform any waiters. */
804 taskgroup->in_taskgroup_wait = false;
805 gomp_sem_post (&taskgroup->taskgroup_sem);
808 ++team->task_queued_count;
809 gomp_team_barrier_set_task_pending (&team->barrier);
810 /* I'm afraid this can't be done after releasing team->task_lock,
811 as gomp_target_task_completion is run from unrelated thread and
812 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
813 the team could be gone already. */
814 if (team->nthreads > team->task_running_count)
815 gomp_team_barrier_wake (&team->barrier, 1);
818 /* Signal that a target task TTASK has completed the asynchronously
819 running phase and should be requeued as a task to handle the
820 variable unmapping. */
822 void
823 GOMP_PLUGIN_target_task_completion (void *data)
825 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
826 struct gomp_task *task = ttask->task;
827 struct gomp_team *team = ttask->team;
829 gomp_mutex_lock (&team->task_lock);
830 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
832 ttask->state = GOMP_TARGET_TASK_FINISHED;
833 gomp_mutex_unlock (&team->task_lock);
834 return;
836 ttask->state = GOMP_TARGET_TASK_FINISHED;
837 gomp_target_task_completion (team, task);
838 gomp_mutex_unlock (&team->task_lock);
841 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
843 /* Called for nowait target tasks. */
845 bool
846 gomp_create_target_task (struct gomp_device_descr *devicep,
847 void (*fn) (void *), size_t mapnum, void **hostaddrs,
848 size_t *sizes, unsigned short *kinds,
849 unsigned int flags, void **depend, void **args,
850 enum gomp_target_task_state state)
852 struct gomp_thread *thr = gomp_thread ();
853 struct gomp_team *team = thr->ts.team;
855 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
856 if (__builtin_expect (gomp_cancel_var, 0) && team)
858 if (gomp_team_barrier_cancelled (&team->barrier))
859 return true;
860 if (thr->task->taskgroup)
862 if (thr->task->taskgroup->cancelled)
863 return true;
864 if (thr->task->taskgroup->workshare
865 && thr->task->taskgroup->prev
866 && thr->task->taskgroup->prev->cancelled)
867 return true;
871 struct gomp_target_task *ttask;
872 struct gomp_task *task;
873 struct gomp_task *parent = thr->task;
874 struct gomp_taskgroup *taskgroup = parent->taskgroup;
875 bool do_wake;
876 size_t depend_size = 0;
877 uintptr_t depend_cnt = 0;
878 size_t tgt_align = 0, tgt_size = 0;
879 uintptr_t args_cnt = 0;
881 if (depend != NULL)
883 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
884 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
886 if (fn)
888 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
889 firstprivate on the target task. */
890 size_t i;
891 for (i = 0; i < mapnum; i++)
892 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
894 size_t align = (size_t) 1 << (kinds[i] >> 8);
895 if (tgt_align < align)
896 tgt_align = align;
897 tgt_size = (tgt_size + align - 1) & ~(align - 1);
898 tgt_size += sizes[i];
900 if (tgt_align)
901 tgt_size += tgt_align - 1;
902 else
903 tgt_size = 0;
904 if (args)
906 void **cargs = args;
907 while (*cargs)
909 intptr_t id = (intptr_t) *cargs++;
910 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
911 cargs++;
913 args_cnt = cargs + 1 - args;
917 task = gomp_malloc (sizeof (*task) + depend_size
918 + sizeof (*ttask)
919 + args_cnt * sizeof (void *)
920 + mapnum * (sizeof (void *) + sizeof (size_t)
921 + sizeof (unsigned short))
922 + tgt_size);
923 gomp_init_task (task, parent, gomp_icv (false));
924 task->priority = 0;
925 task->kind = GOMP_TASK_WAITING;
926 task->in_tied_task = parent->in_tied_task;
927 task->taskgroup = taskgroup;
928 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
929 ttask->devicep = devicep;
930 ttask->fn = fn;
931 ttask->mapnum = mapnum;
932 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
933 if (args_cnt)
935 ttask->args = (void **) &ttask->hostaddrs[mapnum];
936 memcpy (ttask->args, args, args_cnt * sizeof (void *));
937 ttask->sizes = (size_t *) &ttask->args[args_cnt];
939 else
941 ttask->args = args;
942 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
944 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
945 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
946 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
947 if (tgt_align)
949 char *tgt = (char *) &ttask->kinds[mapnum];
950 size_t i;
951 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
952 if (al)
953 tgt += tgt_align - al;
954 tgt_size = 0;
955 for (i = 0; i < mapnum; i++)
956 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
958 size_t align = (size_t) 1 << (kinds[i] >> 8);
959 tgt_size = (tgt_size + align - 1) & ~(align - 1);
960 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
961 ttask->hostaddrs[i] = tgt + tgt_size;
962 tgt_size = tgt_size + sizes[i];
965 ttask->flags = flags;
966 ttask->state = state;
967 ttask->task = task;
968 ttask->team = team;
969 task->fn = NULL;
970 task->fn_data = ttask;
971 task->final_task = 0;
972 gomp_mutex_lock (&team->task_lock);
973 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
974 if (__builtin_expect (gomp_cancel_var, 0))
976 if (gomp_team_barrier_cancelled (&team->barrier))
978 do_cancel:
979 gomp_mutex_unlock (&team->task_lock);
980 gomp_finish_task (task);
981 free (task);
982 return true;
984 if (taskgroup)
986 if (taskgroup->cancelled)
987 goto do_cancel;
988 if (taskgroup->workshare
989 && taskgroup->prev
990 && taskgroup->prev->cancelled)
991 goto do_cancel;
994 if (depend_size)
996 gomp_task_handle_depend (task, parent, depend);
997 if (task->num_dependees)
999 if (taskgroup)
1000 taskgroup->num_children++;
1001 gomp_mutex_unlock (&team->task_lock);
1002 return true;
1005 if (state == GOMP_TARGET_TASK_DATA)
1007 gomp_task_run_post_handle_depend_hash (task);
1008 gomp_mutex_unlock (&team->task_lock);
1009 gomp_finish_task (task);
1010 free (task);
1011 return false;
1013 if (taskgroup)
1014 taskgroup->num_children++;
1015 /* For async offloading, if we don't need to wait for dependencies,
1016 run the gomp_target_task_fn right away, essentially schedule the
1017 mapping part of the task in the current thread. */
1018 if (devicep != NULL
1019 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1021 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
1022 PRIORITY_INSERT_END,
1023 /*adjust_parent_depends_on=*/false,
1024 task->parent_depends_on);
1025 if (taskgroup)
1026 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1027 task, 0, PRIORITY_INSERT_END,
1028 /*adjust_parent_depends_on=*/false,
1029 task->parent_depends_on);
1030 task->pnode[PQ_TEAM].next = NULL;
1031 task->pnode[PQ_TEAM].prev = NULL;
1032 task->kind = GOMP_TASK_TIED;
1033 ++team->task_count;
1034 gomp_mutex_unlock (&team->task_lock);
1036 thr->task = task;
1037 gomp_target_task_fn (task->fn_data);
1038 thr->task = parent;
1040 gomp_mutex_lock (&team->task_lock);
1041 task->kind = GOMP_TASK_ASYNC_RUNNING;
1042 /* If GOMP_PLUGIN_target_task_completion has run already
1043 in between gomp_target_task_fn and the mutex lock,
1044 perform the requeuing here. */
1045 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1046 gomp_target_task_completion (team, task);
1047 else
1048 ttask->state = GOMP_TARGET_TASK_RUNNING;
1049 gomp_mutex_unlock (&team->task_lock);
1050 return true;
1052 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
1053 PRIORITY_INSERT_BEGIN,
1054 /*adjust_parent_depends_on=*/false,
1055 task->parent_depends_on);
1056 if (taskgroup)
1057 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
1058 PRIORITY_INSERT_BEGIN,
1059 /*adjust_parent_depends_on=*/false,
1060 task->parent_depends_on);
1061 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
1062 PRIORITY_INSERT_END,
1063 /*adjust_parent_depends_on=*/false,
1064 task->parent_depends_on);
1065 ++team->task_count;
1066 ++team->task_queued_count;
1067 gomp_team_barrier_set_task_pending (&team->barrier);
1068 do_wake = team->task_running_count + !parent->in_tied_task
1069 < team->nthreads;
1070 gomp_mutex_unlock (&team->task_lock);
1071 if (do_wake)
1072 gomp_team_barrier_wake (&team->barrier, 1);
1073 return true;
1076 /* Given a parent_depends_on task in LIST, move it to the front of its
1077 priority so it is run as soon as possible.
1079 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
1081 We rearrange the queue such that all parent_depends_on tasks are
1082 first, and last_parent_depends_on points to the last such task we
1083 rearranged. For example, given the following tasks in a queue
1084 where PD[123] are the parent_depends_on tasks:
1086 task->children
1089 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
1091 We rearrange such that:
1093 task->children
1094 | +--- last_parent_depends_on
1097 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
1099 static void inline
1100 priority_list_upgrade_task (struct priority_list *list,
1101 struct priority_node *node)
1103 struct priority_node *last_parent_depends_on
1104 = list->last_parent_depends_on;
1105 if (last_parent_depends_on)
1107 node->prev->next = node->next;
1108 node->next->prev = node->prev;
1109 node->prev = last_parent_depends_on;
1110 node->next = last_parent_depends_on->next;
1111 node->prev->next = node;
1112 node->next->prev = node;
1114 else if (node != list->tasks)
1116 node->prev->next = node->next;
1117 node->next->prev = node->prev;
1118 node->prev = list->tasks->prev;
1119 node->next = list->tasks;
1120 list->tasks = node;
1121 node->prev->next = node;
1122 node->next->prev = node;
1124 list->last_parent_depends_on = node;
1127 /* Given a parent_depends_on TASK in its parent's children_queue, move
1128 it to the front of its priority so it is run as soon as possible.
1130 PARENT is passed as an optimization.
1132 (This function could be defined in priority_queue.c, but we want it
1133 inlined, and putting it in priority_queue.h is not an option, given
1134 that gomp_task has not been properly defined at that point). */
1136 static void inline
1137 priority_queue_upgrade_task (struct gomp_task *task,
1138 struct gomp_task *parent)
1140 struct priority_queue *head = &parent->children_queue;
1141 struct priority_node *node = &task->pnode[PQ_CHILDREN];
1142 #if _LIBGOMP_CHECKING_
1143 if (!task->parent_depends_on)
1144 gomp_fatal ("priority_queue_upgrade_task: task must be a "
1145 "parent_depends_on task");
1146 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
1147 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
1148 #endif
1149 if (priority_queue_multi_p (head))
1151 struct priority_list *list
1152 = priority_queue_lookup_priority (head, task->priority);
1153 priority_list_upgrade_task (list, node);
1155 else
1156 priority_list_upgrade_task (&head->l, node);
1159 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1160 the way in LIST so that other tasks can be considered for
1161 execution. LIST contains tasks of type TYPE.
1163 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1164 if applicable. */
1166 static void inline
1167 priority_list_downgrade_task (enum priority_queue_type type,
1168 struct priority_list *list,
1169 struct gomp_task *child_task)
1171 struct priority_node *node = task_to_priority_node (type, child_task);
1172 if (list->tasks == node)
1173 list->tasks = node->next;
1174 else if (node->next != list->tasks)
1176 /* The task in NODE is about to become TIED and TIED tasks
1177 cannot come before WAITING tasks. If we're about to
1178 leave the queue in such an indeterminate state, rewire
1179 things appropriately. However, a TIED task at the end is
1180 perfectly fine. */
1181 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1182 if (next_task->kind == GOMP_TASK_WAITING)
1184 /* Remove from list. */
1185 node->prev->next = node->next;
1186 node->next->prev = node->prev;
1187 /* Rewire at the end. */
1188 node->next = list->tasks;
1189 node->prev = list->tasks->prev;
1190 list->tasks->prev->next = node;
1191 list->tasks->prev = node;
1195 /* If the current task is the last_parent_depends_on for its
1196 priority, adjust last_parent_depends_on appropriately. */
1197 if (__builtin_expect (child_task->parent_depends_on, 0)
1198 && list->last_parent_depends_on == node)
1200 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1201 if (node->prev != node
1202 && prev_child->kind == GOMP_TASK_WAITING
1203 && prev_child->parent_depends_on)
1204 list->last_parent_depends_on = node->prev;
1205 else
1207 /* There are no more parent_depends_on entries waiting
1208 to run, clear the list. */
1209 list->last_parent_depends_on = NULL;
1214 /* Given a TASK in HEAD that is about to be executed, move it out of
1215 the way so that other tasks can be considered for execution. HEAD
1216 contains tasks of type TYPE.
1218 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1219 if applicable.
1221 (This function could be defined in priority_queue.c, but we want it
1222 inlined, and putting it in priority_queue.h is not an option, given
1223 that gomp_task has not been properly defined at that point). */
1225 static void inline
1226 priority_queue_downgrade_task (enum priority_queue_type type,
1227 struct priority_queue *head,
1228 struct gomp_task *task)
1230 #if _LIBGOMP_CHECKING_
1231 if (!priority_queue_task_in_queue_p (type, head, task))
1232 gomp_fatal ("Attempt to downgrade missing task %p", task);
1233 #endif
1234 if (priority_queue_multi_p (head))
1236 struct priority_list *list
1237 = priority_queue_lookup_priority (head, task->priority);
1238 priority_list_downgrade_task (type, list, task);
1240 else
1241 priority_list_downgrade_task (type, &head->l, task);
1244 /* Setup CHILD_TASK to execute. This is done by setting the task to
1245 TIED, and updating all relevant queues so that CHILD_TASK is no
1246 longer chosen for scheduling. Also, remove CHILD_TASK from the
1247 overall team task queue entirely.
1249 Return TRUE if task or its containing taskgroup has been
1250 cancelled. */
1252 static inline bool
1253 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1254 struct gomp_team *team)
1256 #if _LIBGOMP_CHECKING_
1257 if (child_task->parent)
1258 priority_queue_verify (PQ_CHILDREN,
1259 &child_task->parent->children_queue, true);
1260 if (child_task->taskgroup)
1261 priority_queue_verify (PQ_TASKGROUP,
1262 &child_task->taskgroup->taskgroup_queue, false);
1263 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1264 #endif
1266 /* Task is about to go tied, move it out of the way. */
1267 if (parent)
1268 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1269 child_task);
1271 /* Task is about to go tied, move it out of the way. */
1272 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1273 if (taskgroup)
1274 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1275 child_task);
1277 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1278 MEMMODEL_RELAXED);
1279 child_task->pnode[PQ_TEAM].next = NULL;
1280 child_task->pnode[PQ_TEAM].prev = NULL;
1281 child_task->kind = GOMP_TASK_TIED;
1283 if (--team->task_queued_count == 0)
1284 gomp_team_barrier_clear_task_pending (&team->barrier);
1285 if (__builtin_expect (gomp_cancel_var, 0)
1286 && !child_task->copy_ctors_done)
1288 if (gomp_team_barrier_cancelled (&team->barrier))
1289 return true;
1290 if (taskgroup)
1292 if (taskgroup->cancelled)
1293 return true;
1294 if (taskgroup->workshare
1295 && taskgroup->prev
1296 && taskgroup->prev->cancelled)
1297 return true;
1300 return false;
1303 static void
1304 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1306 struct gomp_task *parent = child_task->parent;
1307 size_t i;
1309 if (parent->depend_all_memory == child_task)
1310 parent->depend_all_memory = NULL;
1311 for (i = 0; i < child_task->depend_count; i++)
1312 if (!child_task->depend[i].redundant)
1314 if (child_task->depend[i].next)
1315 child_task->depend[i].next->prev = child_task->depend[i].prev;
1316 if (child_task->depend[i].prev)
1317 child_task->depend[i].prev->next = child_task->depend[i].next;
1318 else
1320 hash_entry_type *slot
1321 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1322 NO_INSERT);
1323 if (*slot != &child_task->depend[i])
1324 abort ();
1325 if (child_task->depend[i].next)
1326 *slot = child_task->depend[i].next;
1327 else
1328 htab_clear_slot (parent->depend_hash, slot);
1333 /* After a CHILD_TASK has been run, adjust the dependency queue for
1334 each task that depends on CHILD_TASK, to record the fact that there
1335 is one less dependency to worry about. If a task that depended on
1336 CHILD_TASK now has no dependencies, place it in the various queues
1337 so it gets scheduled to run.
1339 TEAM is the team to which CHILD_TASK belongs to. */
1341 static size_t
1342 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1343 struct gomp_team *team)
1345 struct gomp_task *parent = child_task->parent;
1346 size_t i, count = child_task->dependers->n_elem, ret = 0;
1347 for (i = 0; i < count; i++)
1349 struct gomp_task *task = child_task->dependers->elem[i];
1351 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1352 TASK's remaining dependencies. Once TASK has no other
1353 dependencies, put it into the various queues so it will get
1354 scheduled for execution. */
1355 if (--task->num_dependees != 0)
1356 continue;
1358 struct gomp_taskgroup *taskgroup = task->taskgroup;
1359 if (parent)
1361 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1362 task, task->priority,
1363 PRIORITY_INSERT_BEGIN,
1364 /*adjust_parent_depends_on=*/true,
1365 task->parent_depends_on);
1366 if (parent->taskwait)
1368 if (parent->taskwait->in_taskwait)
1370 /* One more task has had its dependencies met.
1371 Inform any waiters. */
1372 parent->taskwait->in_taskwait = false;
1373 gomp_sem_post (&parent->taskwait->taskwait_sem);
1375 else if (parent->taskwait->in_depend_wait)
1377 /* One more task has had its dependencies met.
1378 Inform any waiters. */
1379 parent->taskwait->in_depend_wait = false;
1380 gomp_sem_post (&parent->taskwait->taskwait_sem);
1384 else
1385 task->parent = NULL;
1386 if (taskgroup)
1388 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1389 task, task->priority,
1390 PRIORITY_INSERT_BEGIN,
1391 /*adjust_parent_depends_on=*/false,
1392 task->parent_depends_on);
1393 if (taskgroup->in_taskgroup_wait)
1395 /* One more task has had its dependencies met.
1396 Inform any waiters. */
1397 taskgroup->in_taskgroup_wait = false;
1398 gomp_sem_post (&taskgroup->taskgroup_sem);
1401 priority_queue_insert (PQ_TEAM, &team->task_queue,
1402 task, task->priority,
1403 PRIORITY_INSERT_END,
1404 /*adjust_parent_depends_on=*/false,
1405 task->parent_depends_on);
1406 ++team->task_count;
1407 ++team->task_queued_count;
1408 ++ret;
1410 free (child_task->dependers);
1411 child_task->dependers = NULL;
1412 if (ret > 1)
1413 gomp_team_barrier_set_task_pending (&team->barrier);
1414 return ret;
1417 static inline size_t
1418 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1419 struct gomp_team *team)
1421 if (child_task->depend_count == 0)
1422 return 0;
1424 /* If parent is gone already, the hash table is freed and nothing
1425 will use the hash table anymore, no need to remove anything from it. */
1426 if (child_task->parent != NULL)
1427 gomp_task_run_post_handle_depend_hash (child_task);
1429 if (child_task->dependers == NULL)
1430 return 0;
1432 return gomp_task_run_post_handle_dependers (child_task, team);
1435 /* Remove CHILD_TASK from its parent. */
1437 static inline void
1438 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1440 struct gomp_task *parent = child_task->parent;
1441 if (parent == NULL)
1442 return;
1444 /* If this was the last task the parent was depending on,
1445 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1446 clean up and return. */
1447 if (__builtin_expect (child_task->parent_depends_on, 0)
1448 && --parent->taskwait->n_depend == 0
1449 && parent->taskwait->in_depend_wait)
1451 parent->taskwait->in_depend_wait = false;
1452 gomp_sem_post (&parent->taskwait->taskwait_sem);
1455 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1456 child_task, MEMMODEL_RELEASE)
1457 && parent->taskwait && parent->taskwait->in_taskwait)
1459 parent->taskwait->in_taskwait = false;
1460 gomp_sem_post (&parent->taskwait->taskwait_sem);
1462 child_task->pnode[PQ_CHILDREN].next = NULL;
1463 child_task->pnode[PQ_CHILDREN].prev = NULL;
1466 /* Remove CHILD_TASK from its taskgroup. */
1468 static inline void
1469 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1471 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1472 if (taskgroup == NULL)
1473 return;
1474 bool empty = priority_queue_remove (PQ_TASKGROUP,
1475 &taskgroup->taskgroup_queue,
1476 child_task, MEMMODEL_RELAXED);
1477 child_task->pnode[PQ_TASKGROUP].next = NULL;
1478 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1479 if (taskgroup->num_children > 1)
1480 --taskgroup->num_children;
1481 else
1483 /* We access taskgroup->num_children in GOMP_taskgroup_end
1484 outside of the task lock mutex region, so
1485 need a release barrier here to ensure memory
1486 written by child_task->fn above is flushed
1487 before the NULL is written. */
1488 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1490 if (empty && taskgroup->in_taskgroup_wait)
1492 taskgroup->in_taskgroup_wait = false;
1493 gomp_sem_post (&taskgroup->taskgroup_sem);
1497 void
1498 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1500 struct gomp_thread *thr = gomp_thread ();
1501 struct gomp_team *team = thr->ts.team;
1502 struct gomp_task *task = thr->task;
1503 struct gomp_task *child_task = NULL;
1504 struct gomp_task *to_free = NULL;
1505 int do_wake = 0;
1507 gomp_mutex_lock (&team->task_lock);
1508 if (gomp_barrier_last_thread (state))
1510 if (team->task_count == 0)
1512 gomp_team_barrier_done (&team->barrier, state);
1513 gomp_mutex_unlock (&team->task_lock);
1514 gomp_team_barrier_wake (&team->barrier, 0);
1515 return;
1517 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1520 while (1)
1522 bool cancelled = false;
1524 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1526 bool ignored;
1527 child_task
1528 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1529 PQ_IGNORED, NULL,
1530 &ignored);
1531 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1532 team);
1533 if (__builtin_expect (cancelled, 0))
1535 if (to_free)
1537 gomp_finish_task (to_free);
1538 free (to_free);
1539 to_free = NULL;
1541 goto finish_cancelled;
1543 team->task_running_count++;
1544 child_task->in_tied_task = true;
1546 else if (team->task_count == 0
1547 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1549 gomp_team_barrier_done (&team->barrier, state);
1550 gomp_mutex_unlock (&team->task_lock);
1551 gomp_team_barrier_wake (&team->barrier, 0);
1552 if (to_free)
1554 gomp_finish_task (to_free);
1555 free (to_free);
1557 return;
1559 gomp_mutex_unlock (&team->task_lock);
1560 if (do_wake)
1562 gomp_team_barrier_wake (&team->barrier, do_wake);
1563 do_wake = 0;
1565 if (to_free)
1567 gomp_finish_task (to_free);
1568 free (to_free);
1569 to_free = NULL;
1571 if (child_task)
1573 thr->task = child_task;
1574 if (__builtin_expect (child_task->fn == NULL, 0))
1576 if (gomp_target_task_fn (child_task->fn_data))
1578 thr->task = task;
1579 gomp_mutex_lock (&team->task_lock);
1580 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1581 team->task_running_count--;
1582 struct gomp_target_task *ttask
1583 = (struct gomp_target_task *) child_task->fn_data;
1584 /* If GOMP_PLUGIN_target_task_completion has run already
1585 in between gomp_target_task_fn and the mutex lock,
1586 perform the requeuing here. */
1587 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1588 gomp_target_task_completion (team, child_task);
1589 else
1590 ttask->state = GOMP_TARGET_TASK_RUNNING;
1591 child_task = NULL;
1592 continue;
1595 else
1596 child_task->fn (child_task->fn_data);
1597 thr->task = task;
1599 else
1600 return;
1601 gomp_mutex_lock (&team->task_lock);
1602 if (child_task)
1604 if (child_task->detach_team)
1606 assert (child_task->detach_team == team);
1607 child_task->kind = GOMP_TASK_DETACHED;
1608 ++team->task_detach_count;
1609 --team->task_running_count;
1610 gomp_debug (0,
1611 "thread %d: task with event %p finished without "
1612 "completion event fulfilled in team barrier\n",
1613 thr->ts.team_id, child_task);
1614 child_task = NULL;
1615 continue;
1618 finish_cancelled:;
1619 size_t new_tasks
1620 = gomp_task_run_post_handle_depend (child_task, team);
1621 gomp_task_run_post_remove_parent (child_task);
1622 gomp_clear_parent (&child_task->children_queue);
1623 gomp_task_run_post_remove_taskgroup (child_task);
1624 to_free = child_task;
1625 if (!cancelled)
1626 team->task_running_count--;
1627 child_task = NULL;
1628 if (new_tasks > 1)
1630 do_wake = team->nthreads - team->task_running_count;
1631 if (do_wake > new_tasks)
1632 do_wake = new_tasks;
1634 --team->task_count;
1639 /* Called when encountering a taskwait directive.
1641 Wait for all children of the current task. */
1643 void
1644 GOMP_taskwait (void)
1646 struct gomp_thread *thr = gomp_thread ();
1647 struct gomp_team *team = thr->ts.team;
1648 struct gomp_task *task = thr->task;
1649 struct gomp_task *child_task = NULL;
1650 struct gomp_task *to_free = NULL;
1651 struct gomp_taskwait taskwait;
1652 int do_wake = 0;
1654 /* The acquire barrier on load of task->children here synchronizes
1655 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1656 not necessary that we synchronize with other non-NULL writes at
1657 this point, but we must ensure that all writes to memory by a
1658 child thread task work function are seen before we exit from
1659 GOMP_taskwait. */
1660 if (task == NULL
1661 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1662 return;
1664 memset (&taskwait, 0, sizeof (taskwait));
1665 bool child_q = false;
1666 gomp_mutex_lock (&team->task_lock);
1667 while (1)
1669 bool cancelled = false;
1670 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1672 bool destroy_taskwait = task->taskwait != NULL;
1673 task->taskwait = NULL;
1674 gomp_mutex_unlock (&team->task_lock);
1675 if (to_free)
1677 gomp_finish_task (to_free);
1678 free (to_free);
1680 if (destroy_taskwait)
1681 gomp_sem_destroy (&taskwait.taskwait_sem);
1682 return;
1684 struct gomp_task *next_task
1685 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1686 PQ_TEAM, &team->task_queue, &child_q);
1687 if (next_task->kind == GOMP_TASK_WAITING)
1689 child_task = next_task;
1690 cancelled
1691 = gomp_task_run_pre (child_task, task, team);
1692 if (__builtin_expect (cancelled, 0))
1694 if (to_free)
1696 gomp_finish_task (to_free);
1697 free (to_free);
1698 to_free = NULL;
1700 goto finish_cancelled;
1703 else
1705 /* All tasks we are waiting for are either running in other
1706 threads, are detached and waiting for the completion event to be
1707 fulfilled, or they are tasks that have not had their
1708 dependencies met (so they're not even in the queue). Wait
1709 for them. */
1710 if (task->taskwait == NULL)
1712 taskwait.in_depend_wait = false;
1713 gomp_sem_init (&taskwait.taskwait_sem, 0);
1714 task->taskwait = &taskwait;
1716 taskwait.in_taskwait = true;
1718 gomp_mutex_unlock (&team->task_lock);
1719 if (do_wake)
1721 gomp_team_barrier_wake (&team->barrier, do_wake);
1722 do_wake = 0;
1724 if (to_free)
1726 gomp_finish_task (to_free);
1727 free (to_free);
1728 to_free = NULL;
1730 if (child_task)
1732 thr->task = child_task;
1733 if (__builtin_expect (child_task->fn == NULL, 0))
1735 if (gomp_target_task_fn (child_task->fn_data))
1737 thr->task = task;
1738 gomp_mutex_lock (&team->task_lock);
1739 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1740 struct gomp_target_task *ttask
1741 = (struct gomp_target_task *) child_task->fn_data;
1742 /* If GOMP_PLUGIN_target_task_completion has run already
1743 in between gomp_target_task_fn and the mutex lock,
1744 perform the requeuing here. */
1745 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1746 gomp_target_task_completion (team, child_task);
1747 else
1748 ttask->state = GOMP_TARGET_TASK_RUNNING;
1749 child_task = NULL;
1750 continue;
1753 else
1754 child_task->fn (child_task->fn_data);
1755 thr->task = task;
1757 else
1758 gomp_sem_wait (&taskwait.taskwait_sem);
1759 gomp_mutex_lock (&team->task_lock);
1760 if (child_task)
1762 if (child_task->detach_team)
1764 assert (child_task->detach_team == team);
1765 child_task->kind = GOMP_TASK_DETACHED;
1766 ++team->task_detach_count;
1767 gomp_debug (0,
1768 "thread %d: task with event %p finished without "
1769 "completion event fulfilled in taskwait\n",
1770 thr->ts.team_id, child_task);
1771 child_task = NULL;
1772 continue;
1775 finish_cancelled:;
1776 size_t new_tasks
1777 = gomp_task_run_post_handle_depend (child_task, team);
1779 if (child_q)
1781 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1782 child_task, MEMMODEL_RELAXED);
1783 child_task->pnode[PQ_CHILDREN].next = NULL;
1784 child_task->pnode[PQ_CHILDREN].prev = NULL;
1787 gomp_clear_parent (&child_task->children_queue);
1789 gomp_task_run_post_remove_taskgroup (child_task);
1791 to_free = child_task;
1792 child_task = NULL;
1793 team->task_count--;
1794 if (new_tasks > 1)
1796 do_wake = team->nthreads - team->task_running_count
1797 - !task->in_tied_task;
1798 if (do_wake > new_tasks)
1799 do_wake = new_tasks;
1805 /* Called when encountering a taskwait directive with depend clause(s).
1806 Wait as if it was an mergeable included task construct with empty body. */
1808 void
1809 GOMP_taskwait_depend (void **depend)
1811 struct gomp_thread *thr = gomp_thread ();
1812 struct gomp_team *team = thr->ts.team;
1814 /* If parallel or taskgroup has been cancelled, return early. */
1815 if (__builtin_expect (gomp_cancel_var, 0) && team)
1817 if (gomp_team_barrier_cancelled (&team->barrier))
1818 return;
1819 if (thr->task->taskgroup)
1821 if (thr->task->taskgroup->cancelled)
1822 return;
1823 if (thr->task->taskgroup->workshare
1824 && thr->task->taskgroup->prev
1825 && thr->task->taskgroup->prev->cancelled)
1826 return;
1830 if (thr->task && thr->task->depend_hash)
1831 gomp_task_maybe_wait_for_dependencies (depend);
1834 /* An undeferred task is about to run. Wait for all tasks that this
1835 undeferred task depends on.
1837 This is done by first putting all known ready dependencies
1838 (dependencies that have their own dependencies met) at the top of
1839 the scheduling queues. Then we iterate through these imminently
1840 ready tasks (and possibly other high priority tasks), and run them.
1841 If we run out of ready dependencies to execute, we either wait for
1842 the remaining dependencies to finish, or wait for them to get
1843 scheduled so we can run them.
1845 DEPEND is as in GOMP_task. */
1847 void
1848 gomp_task_maybe_wait_for_dependencies (void **depend)
1850 struct gomp_thread *thr = gomp_thread ();
1851 struct gomp_task *task = thr->task;
1852 struct gomp_team *team = thr->ts.team;
1853 struct gomp_task_depend_entry elem, *ent = NULL;
1854 struct gomp_taskwait taskwait;
1855 size_t orig_ndepend = (uintptr_t) depend[0];
1856 size_t nout = (uintptr_t) depend[1];
1857 size_t ndepend = orig_ndepend;
1858 size_t normal = ndepend;
1859 size_t n = 2;
1860 size_t i;
1861 size_t num_awaited = 0;
1862 struct gomp_task *child_task = NULL;
1863 struct gomp_task *to_free = NULL;
1864 int do_wake = 0;
1866 if (ndepend == 0)
1868 ndepend = nout;
1869 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1870 normal = nout + (uintptr_t) depend[4];
1871 n = 5;
1873 gomp_mutex_lock (&team->task_lock);
1874 if (__builtin_expect (task->depend_all_memory && ndepend, false))
1876 struct gomp_task *tsk = task->depend_all_memory;
1877 if (!tsk->parent_depends_on)
1879 tsk->parent_depends_on = true;
1880 ++num_awaited;
1881 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1882 priority_queue_upgrade_task (tsk, task);
1885 for (i = 0; i < ndepend; i++)
1887 elem.addr = depend[i + n];
1888 elem.is_in = i >= nout;
1889 if (__builtin_expect (i >= normal, 0))
1891 void **d = (void **) elem.addr;
1892 switch ((uintptr_t) d[1])
1894 case GOMP_DEPEND_IN:
1895 break;
1896 case GOMP_DEPEND_OUT:
1897 case GOMP_DEPEND_INOUT:
1898 case GOMP_DEPEND_MUTEXINOUTSET:
1899 elem.is_in = 0;
1900 break;
1901 case GOMP_DEPEND_INOUTSET:
1902 elem.is_in = 2;
1903 break;
1904 default:
1905 gomp_fatal ("unknown omp_depend_t dependence type %d",
1906 (int) (uintptr_t) d[1]);
1908 elem.addr = d[0];
1910 if (__builtin_expect (elem.addr == NULL && !elem.is_in, false))
1912 size_t size = htab_size (task->depend_hash);
1913 if (htab_elements (task->depend_hash) * 8 < size && size > 32)
1914 htab_expand (task->depend_hash);
1916 /* depend(inout: omp_all_memory) - depend on all previous
1917 sibling tasks that do have dependencies. Inlined
1918 htab_traverse. */
1919 hash_entry_type *slot = &task->depend_hash->entries[0];
1920 hash_entry_type *end = slot + htab_size (task->depend_hash);
1921 for (; slot != end; ++slot)
1923 if (*slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
1924 continue;
1925 for (ent = *slot; ent; ent = ent->next)
1927 struct gomp_task *tsk = ent->task;
1928 if (!tsk->parent_depends_on)
1930 tsk->parent_depends_on = true;
1931 ++num_awaited;
1932 if (tsk->num_dependees == 0
1933 && tsk->kind == GOMP_TASK_WAITING)
1934 priority_queue_upgrade_task (tsk, task);
1938 break;
1940 ent = htab_find (task->depend_hash, &elem);
1941 for (; ent; ent = ent->next)
1942 if (elem.is_in && elem.is_in == ent->is_in)
1943 continue;
1944 else
1946 struct gomp_task *tsk = ent->task;
1947 if (!tsk->parent_depends_on)
1949 tsk->parent_depends_on = true;
1950 ++num_awaited;
1951 /* If dependency TSK itself has no dependencies and is
1952 ready to run, move it up front so that we run it as
1953 soon as possible. */
1954 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1955 priority_queue_upgrade_task (tsk, task);
1959 if (num_awaited == 0)
1961 gomp_mutex_unlock (&team->task_lock);
1962 return;
1965 memset (&taskwait, 0, sizeof (taskwait));
1966 taskwait.n_depend = num_awaited;
1967 gomp_sem_init (&taskwait.taskwait_sem, 0);
1968 task->taskwait = &taskwait;
1970 while (1)
1972 bool cancelled = false;
1973 if (taskwait.n_depend == 0)
1975 task->taskwait = NULL;
1976 gomp_mutex_unlock (&team->task_lock);
1977 if (to_free)
1979 gomp_finish_task (to_free);
1980 free (to_free);
1982 gomp_sem_destroy (&taskwait.taskwait_sem);
1983 return;
1986 /* Theoretically when we have multiple priorities, we should
1987 chose between the highest priority item in
1988 task->children_queue and team->task_queue here, so we should
1989 use priority_queue_next_task(). However, since we are
1990 running an undeferred task, perhaps that makes all tasks it
1991 depends on undeferred, thus a priority of INF? This would
1992 make it unnecessary to take anything into account here,
1993 but the dependencies.
1995 On the other hand, if we want to use priority_queue_next_task(),
1996 care should be taken to only use priority_queue_remove()
1997 below if the task was actually removed from the children
1998 queue. */
1999 bool ignored;
2000 struct gomp_task *next_task
2001 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2002 PQ_IGNORED, NULL, &ignored);
2004 if (next_task->kind == GOMP_TASK_WAITING)
2006 child_task = next_task;
2007 cancelled
2008 = gomp_task_run_pre (child_task, task, team);
2009 if (__builtin_expect (cancelled, 0))
2011 if (to_free)
2013 gomp_finish_task (to_free);
2014 free (to_free);
2015 to_free = NULL;
2017 goto finish_cancelled;
2020 else
2021 /* All tasks we are waiting for are either running in other
2022 threads, or they are tasks that have not had their
2023 dependencies met (so they're not even in the queue). Wait
2024 for them. */
2025 taskwait.in_depend_wait = true;
2026 gomp_mutex_unlock (&team->task_lock);
2027 if (do_wake)
2029 gomp_team_barrier_wake (&team->barrier, do_wake);
2030 do_wake = 0;
2032 if (to_free)
2034 gomp_finish_task (to_free);
2035 free (to_free);
2036 to_free = NULL;
2038 if (child_task)
2040 thr->task = child_task;
2041 if (__builtin_expect (child_task->fn == NULL, 0))
2043 if (gomp_target_task_fn (child_task->fn_data))
2045 thr->task = task;
2046 gomp_mutex_lock (&team->task_lock);
2047 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2048 struct gomp_target_task *ttask
2049 = (struct gomp_target_task *) child_task->fn_data;
2050 /* If GOMP_PLUGIN_target_task_completion has run already
2051 in between gomp_target_task_fn and the mutex lock,
2052 perform the requeuing here. */
2053 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2054 gomp_target_task_completion (team, child_task);
2055 else
2056 ttask->state = GOMP_TARGET_TASK_RUNNING;
2057 child_task = NULL;
2058 continue;
2061 else
2062 child_task->fn (child_task->fn_data);
2063 thr->task = task;
2065 else
2066 gomp_sem_wait (&taskwait.taskwait_sem);
2067 gomp_mutex_lock (&team->task_lock);
2068 if (child_task)
2070 finish_cancelled:;
2071 size_t new_tasks
2072 = gomp_task_run_post_handle_depend (child_task, team);
2073 if (child_task->parent_depends_on)
2074 --taskwait.n_depend;
2076 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
2077 child_task, MEMMODEL_RELAXED);
2078 child_task->pnode[PQ_CHILDREN].next = NULL;
2079 child_task->pnode[PQ_CHILDREN].prev = NULL;
2081 gomp_clear_parent (&child_task->children_queue);
2082 gomp_task_run_post_remove_taskgroup (child_task);
2083 to_free = child_task;
2084 child_task = NULL;
2085 team->task_count--;
2086 if (new_tasks > 1)
2088 do_wake = team->nthreads - team->task_running_count
2089 - !task->in_tied_task;
2090 if (do_wake > new_tasks)
2091 do_wake = new_tasks;
2097 /* Called when encountering a taskyield directive. */
2099 void
2100 GOMP_taskyield (void)
2102 /* Nothing at the moment. */
2105 static inline struct gomp_taskgroup *
2106 gomp_taskgroup_init (struct gomp_taskgroup *prev)
2108 struct gomp_taskgroup *taskgroup
2109 = gomp_malloc (sizeof (struct gomp_taskgroup));
2110 taskgroup->prev = prev;
2111 priority_queue_init (&taskgroup->taskgroup_queue);
2112 taskgroup->reductions = prev ? prev->reductions : NULL;
2113 taskgroup->in_taskgroup_wait = false;
2114 taskgroup->cancelled = false;
2115 taskgroup->workshare = false;
2116 taskgroup->num_children = 0;
2117 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
2118 return taskgroup;
2121 void
2122 GOMP_taskgroup_start (void)
2124 struct gomp_thread *thr = gomp_thread ();
2125 struct gomp_team *team = thr->ts.team;
2126 struct gomp_task *task = thr->task;
2128 /* If team is NULL, all tasks are executed as
2129 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
2130 taskgroup and their descendant tasks will be finished
2131 by the time GOMP_taskgroup_end is called. */
2132 if (team == NULL)
2133 return;
2134 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2137 void
2138 GOMP_taskgroup_end (void)
2140 struct gomp_thread *thr = gomp_thread ();
2141 struct gomp_team *team = thr->ts.team;
2142 struct gomp_task *task = thr->task;
2143 struct gomp_taskgroup *taskgroup;
2144 struct gomp_task *child_task = NULL;
2145 struct gomp_task *to_free = NULL;
2146 int do_wake = 0;
2148 if (team == NULL)
2149 return;
2150 taskgroup = task->taskgroup;
2151 if (__builtin_expect (taskgroup == NULL, 0)
2152 && thr->ts.level == 0)
2154 /* This can happen if GOMP_taskgroup_start is called when
2155 thr->ts.team == NULL, but inside of the taskgroup there
2156 is #pragma omp target nowait that creates an implicit
2157 team with a single thread. In this case, we want to wait
2158 for all outstanding tasks in this team. */
2159 gomp_team_barrier_wait (&team->barrier);
2160 return;
2163 /* The acquire barrier on load of taskgroup->num_children here
2164 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
2165 It is not necessary that we synchronize with other non-0 writes at
2166 this point, but we must ensure that all writes to memory by a
2167 child thread task work function are seen before we exit from
2168 GOMP_taskgroup_end. */
2169 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
2170 goto finish;
2172 bool unused;
2173 gomp_mutex_lock (&team->task_lock);
2174 while (1)
2176 bool cancelled = false;
2177 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
2178 MEMMODEL_RELAXED))
2180 if (taskgroup->num_children)
2182 if (priority_queue_empty_p (&task->children_queue,
2183 MEMMODEL_RELAXED))
2184 goto do_wait;
2185 child_task
2186 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2187 PQ_TEAM, &team->task_queue,
2188 &unused);
2190 else
2192 gomp_mutex_unlock (&team->task_lock);
2193 if (to_free)
2195 gomp_finish_task (to_free);
2196 free (to_free);
2198 goto finish;
2201 else
2202 child_task
2203 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2204 PQ_TEAM, &team->task_queue, &unused);
2205 if (child_task->kind == GOMP_TASK_WAITING)
2207 cancelled
2208 = gomp_task_run_pre (child_task, child_task->parent, team);
2209 if (__builtin_expect (cancelled, 0))
2211 if (to_free)
2213 gomp_finish_task (to_free);
2214 free (to_free);
2215 to_free = NULL;
2217 goto finish_cancelled;
2220 else
2222 child_task = NULL;
2223 do_wait:
2224 /* All tasks we are waiting for are either running in other
2225 threads, or they are tasks that have not had their
2226 dependencies met (so they're not even in the queue). Wait
2227 for them. */
2228 taskgroup->in_taskgroup_wait = true;
2230 gomp_mutex_unlock (&team->task_lock);
2231 if (do_wake)
2233 gomp_team_barrier_wake (&team->barrier, do_wake);
2234 do_wake = 0;
2236 if (to_free)
2238 gomp_finish_task (to_free);
2239 free (to_free);
2240 to_free = NULL;
2242 if (child_task)
2244 thr->task = child_task;
2245 if (__builtin_expect (child_task->fn == NULL, 0))
2247 if (gomp_target_task_fn (child_task->fn_data))
2249 thr->task = task;
2250 gomp_mutex_lock (&team->task_lock);
2251 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2252 struct gomp_target_task *ttask
2253 = (struct gomp_target_task *) child_task->fn_data;
2254 /* If GOMP_PLUGIN_target_task_completion has run already
2255 in between gomp_target_task_fn and the mutex lock,
2256 perform the requeuing here. */
2257 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2258 gomp_target_task_completion (team, child_task);
2259 else
2260 ttask->state = GOMP_TARGET_TASK_RUNNING;
2261 child_task = NULL;
2262 continue;
2265 else
2266 child_task->fn (child_task->fn_data);
2267 thr->task = task;
2269 else
2270 gomp_sem_wait (&taskgroup->taskgroup_sem);
2271 gomp_mutex_lock (&team->task_lock);
2272 if (child_task)
2274 if (child_task->detach_team)
2276 assert (child_task->detach_team == team);
2277 child_task->kind = GOMP_TASK_DETACHED;
2278 ++team->task_detach_count;
2279 gomp_debug (0,
2280 "thread %d: task with event %p finished without "
2281 "completion event fulfilled in taskgroup\n",
2282 thr->ts.team_id, child_task);
2283 child_task = NULL;
2284 continue;
2287 finish_cancelled:;
2288 size_t new_tasks
2289 = gomp_task_run_post_handle_depend (child_task, team);
2290 gomp_task_run_post_remove_parent (child_task);
2291 gomp_clear_parent (&child_task->children_queue);
2292 gomp_task_run_post_remove_taskgroup (child_task);
2293 to_free = child_task;
2294 child_task = NULL;
2295 team->task_count--;
2296 if (new_tasks > 1)
2298 do_wake = team->nthreads - team->task_running_count
2299 - !task->in_tied_task;
2300 if (do_wake > new_tasks)
2301 do_wake = new_tasks;
2306 finish:
2307 task->taskgroup = taskgroup->prev;
2308 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2309 free (taskgroup);
2312 static inline __attribute__((always_inline)) void
2313 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2314 unsigned nthreads)
2316 size_t total_cnt = 0;
2317 uintptr_t *d = data;
2318 struct htab *old_htab = NULL, *new_htab;
2321 if (__builtin_expect (orig != NULL, 0))
2323 /* For worksharing task reductions, memory has been allocated
2324 already by some other thread that encountered the construct
2325 earlier. */
2326 d[2] = orig[2];
2327 d[6] = orig[6];
2328 orig = (uintptr_t *) orig[4];
2330 else
2332 size_t sz = d[1] * nthreads;
2333 /* Should use omp_alloc if d[3] is not -1. */
2334 void *ptr = gomp_aligned_alloc (d[2], sz);
2335 memset (ptr, '\0', sz);
2336 d[2] = (uintptr_t) ptr;
2337 d[6] = d[2] + sz;
2339 d[5] = 0;
2340 total_cnt += d[0];
2341 if (d[4] == 0)
2343 d[4] = (uintptr_t) old;
2344 break;
2346 else
2347 d = (uintptr_t *) d[4];
2349 while (1);
2350 if (old && old[5])
2352 old_htab = (struct htab *) old[5];
2353 total_cnt += htab_elements (old_htab);
2355 new_htab = htab_create (total_cnt);
2356 if (old_htab)
2358 /* Copy old hash table, like in htab_expand. */
2359 hash_entry_type *p, *olimit;
2360 new_htab->n_elements = htab_elements (old_htab);
2361 olimit = old_htab->entries + old_htab->size;
2362 p = old_htab->entries;
2365 hash_entry_type x = *p;
2366 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2367 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2368 p++;
2370 while (p < olimit);
2372 d = data;
2375 size_t j;
2376 for (j = 0; j < d[0]; ++j)
2378 uintptr_t *p = d + 7 + j * 3;
2379 p[2] = (uintptr_t) d;
2380 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2381 which hash on the first element which is a pointer. We need
2382 to hash also on the first sizeof (uintptr_t) bytes which contain
2383 a pointer. Hide the cast from the compiler. */
2384 hash_entry_type n;
2385 __asm ("" : "=g" (n) : "0" (p));
2386 *htab_find_slot (&new_htab, n, INSERT) = n;
2388 if (d[4] == (uintptr_t) old)
2389 break;
2390 else
2391 d = (uintptr_t *) d[4];
2393 while (1);
2394 d[5] = (uintptr_t) new_htab;
2397 static void
2398 gomp_create_artificial_team (void)
2400 struct gomp_thread *thr = gomp_thread ();
2401 struct gomp_task_icv *icv;
2402 struct gomp_team *team = gomp_new_team (1);
2403 struct gomp_task *task = thr->task;
2404 icv = task ? &task->icv : &gomp_global_icv;
2405 team->prev_ts = thr->ts;
2406 thr->ts.team = team;
2407 thr->ts.team_id = 0;
2408 thr->ts.work_share = &team->work_shares[0];
2409 thr->ts.last_work_share = NULL;
2410 #ifdef HAVE_SYNC_BUILTINS
2411 thr->ts.single_count = 0;
2412 #endif
2413 thr->ts.static_trip = 0;
2414 thr->task = &team->implicit_task[0];
2415 gomp_init_task (thr->task, NULL, icv);
2416 if (task)
2418 thr->task = task;
2419 gomp_end_task ();
2420 free (task);
2421 thr->task = &team->implicit_task[0];
2423 #ifdef LIBGOMP_USE_PTHREADS
2424 else
2425 pthread_setspecific (gomp_thread_destructor, thr);
2426 #endif
2429 /* The format of data is:
2430 data[0] cnt
2431 data[1] size
2432 data[2] alignment (on output array pointer)
2433 data[3] allocator (-1 if malloc allocator)
2434 data[4] next pointer
2435 data[5] used internally (htab pointer)
2436 data[6] used internally (end of array)
2437 cnt times
2438 ent[0] address
2439 ent[1] offset
2440 ent[2] used internally (pointer to data[0])
2441 The entries are sorted by increasing offset, so that a binary
2442 search can be performed. Normally, data[8] is 0, exception is
2443 for worksharing construct task reductions in cancellable parallel,
2444 where at offset 0 there should be space for a pointer and an integer
2445 which are used internally. */
2447 void
2448 GOMP_taskgroup_reduction_register (uintptr_t *data)
2450 struct gomp_thread *thr = gomp_thread ();
2451 struct gomp_team *team = thr->ts.team;
2452 struct gomp_task *task;
2453 unsigned nthreads;
2454 if (__builtin_expect (team == NULL, 0))
2456 /* The task reduction code needs a team and task, so for
2457 orphaned taskgroups just create the implicit team. */
2458 gomp_create_artificial_team ();
2459 ialias_call (GOMP_taskgroup_start) ();
2460 team = thr->ts.team;
2462 nthreads = team->nthreads;
2463 task = thr->task;
2464 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2465 task->taskgroup->reductions = data;
2468 void
2469 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2471 uintptr_t *d = data;
2472 htab_free ((struct htab *) data[5]);
2475 gomp_aligned_free ((void *) d[2]);
2476 d = (uintptr_t *) d[4];
2478 while (d && !d[5]);
2480 ialias (GOMP_taskgroup_reduction_unregister)
2482 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2483 original list item or address of previously remapped original list
2484 item to address of the private copy, store that to ptrs[i].
2485 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2486 the original list item. */
2488 void
2489 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2491 struct gomp_thread *thr = gomp_thread ();
2492 struct gomp_task *task = thr->task;
2493 unsigned id = thr->ts.team_id;
2494 uintptr_t *data = task->taskgroup->reductions;
2495 uintptr_t *d;
2496 struct htab *reduction_htab = (struct htab *) data[5];
2497 size_t i;
2498 for (i = 0; i < cnt; ++i)
2500 hash_entry_type ent, n;
2501 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2502 n = htab_find (reduction_htab, ent);
2503 if (n)
2505 uintptr_t *p;
2506 __asm ("" : "=g" (p) : "0" (n));
2507 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2508 p[1] is the offset within the allocated chunk for each
2509 thread, p[2] is the array registered with
2510 GOMP_taskgroup_reduction_register, d[2] is the base of the
2511 allocated memory and d[1] is the size of the allocated chunk
2512 for one thread. */
2513 d = (uintptr_t *) p[2];
2514 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2515 if (__builtin_expect (i < cntorig, 0))
2516 ptrs[cnt + i] = (void *) p[0];
2517 continue;
2519 d = data;
2520 while (d != NULL)
2522 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2523 break;
2524 d = (uintptr_t *) d[4];
2526 if (d == NULL)
2527 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2528 "task modifier for %p", ptrs[i]);
2529 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2530 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2531 if (__builtin_expect (i < cntorig, 0))
2533 size_t lo = 0, hi = d[0] - 1;
2534 while (lo <= hi)
2536 size_t m = (lo + hi) / 2;
2537 if (d[7 + 3 * m + 1] < off)
2538 lo = m + 1;
2539 else if (d[7 + 3 * m + 1] == off)
2541 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2542 break;
2544 else
2545 hi = m - 1;
2547 if (lo > hi)
2548 gomp_fatal ("couldn't find matching task_reduction or reduction "
2549 "with task modifier for %p", ptrs[i]);
2554 struct gomp_taskgroup *
2555 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2557 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2558 gomp_reduction_register (data, NULL, NULL, nthreads);
2559 taskgroup->reductions = data;
2560 return taskgroup;
2563 void
2564 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2566 struct gomp_thread *thr = gomp_thread ();
2567 struct gomp_team *team = thr->ts.team;
2568 struct gomp_task *task = thr->task;
2569 unsigned nthreads = team->nthreads;
2570 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2571 task->taskgroup->reductions = data;
2574 void
2575 gomp_workshare_taskgroup_start (void)
2577 struct gomp_thread *thr = gomp_thread ();
2578 struct gomp_team *team = thr->ts.team;
2579 struct gomp_task *task;
2581 if (team == NULL)
2583 gomp_create_artificial_team ();
2584 team = thr->ts.team;
2586 task = thr->task;
2587 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2588 task->taskgroup->workshare = true;
2591 void
2592 GOMP_workshare_task_reduction_unregister (bool cancelled)
2594 struct gomp_thread *thr = gomp_thread ();
2595 struct gomp_task *task = thr->task;
2596 struct gomp_team *team = thr->ts.team;
2597 uintptr_t *data = task->taskgroup->reductions;
2598 ialias_call (GOMP_taskgroup_end) ();
2599 if (thr->ts.team_id == 0)
2600 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2601 else
2602 htab_free ((struct htab *) data[5]);
2604 if (!cancelled)
2605 gomp_team_barrier_wait (&team->barrier);
2609 omp_in_final (void)
2611 struct gomp_thread *thr = gomp_thread ();
2612 return thr->task && thr->task->final_task;
2615 ialias (omp_in_final)
2617 void
2618 omp_fulfill_event (omp_event_handle_t event)
2620 struct gomp_task *task = (struct gomp_task *) event;
2621 if (!task->deferred_p)
2623 if (gomp_sem_getcount (task->completion_sem) > 0)
2624 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2626 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2627 task);
2628 gomp_sem_post (task->completion_sem);
2629 return;
2632 struct gomp_team *team = __atomic_load_n (&task->detach_team,
2633 MEMMODEL_RELAXED);
2634 if (!team)
2635 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2636 "been fulfilled!\n", task);
2638 gomp_mutex_lock (&team->task_lock);
2639 if (task->kind != GOMP_TASK_DETACHED)
2641 /* The task has not finished running yet. */
2642 gomp_debug (0,
2643 "omp_fulfill_event: %p event fulfilled for unfinished "
2644 "task\n", task);
2645 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2646 gomp_mutex_unlock (&team->task_lock);
2647 return;
2650 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2651 task);
2652 size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2653 gomp_task_run_post_remove_parent (task);
2654 gomp_clear_parent (&task->children_queue);
2655 gomp_task_run_post_remove_taskgroup (task);
2656 team->task_count--;
2657 team->task_detach_count--;
2659 int do_wake = 0;
2660 bool shackled_thread_p = team == gomp_thread ()->ts.team;
2661 if (new_tasks > 0)
2663 /* Wake up threads to run new tasks. */
2664 gomp_team_barrier_set_task_pending (&team->barrier);
2665 do_wake = team->nthreads - team->task_running_count;
2666 if (do_wake > new_tasks)
2667 do_wake = new_tasks;
2670 if (!shackled_thread_p
2671 && !do_wake
2672 && team->task_detach_count == 0
2673 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2674 /* Ensure that at least one thread is woken up to signal that the
2675 barrier can finish. */
2676 do_wake = 1;
2678 /* If we are running in an unshackled thread, the team might vanish before
2679 gomp_team_barrier_wake is run if we release the lock first, so keep the
2680 lock for the call in that case. */
2681 if (shackled_thread_p)
2682 gomp_mutex_unlock (&team->task_lock);
2683 if (do_wake)
2684 gomp_team_barrier_wake (&team->barrier, do_wake);
2685 if (!shackled_thread_p)
2686 gomp_mutex_unlock (&team->task_lock);
2688 gomp_finish_task (task);
2689 free (task);
2692 ialias (omp_fulfill_event)