Daily bump.
[official-gcc.git] / libgomp / task.c
blob414ca6e89aed7f2edcda3ae7285382fe5aef436e
1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
35 typedef struct gomp_task_depend_entry *hash_entry_type;
37 static inline void *
38 htab_alloc (size_t size)
40 return gomp_malloc (size);
43 static inline void
44 htab_free (void *ptr)
46 free (ptr);
49 #include "hashtab.h"
51 static inline hashval_t
52 htab_hash (hash_entry_type element)
54 return hash_pointer (element->addr);
57 static inline bool
58 htab_eq (hash_entry_type x, hash_entry_type y)
60 return x->addr == y->addr;
63 /* Create a new task data structure. */
65 void
66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 struct gomp_task_icv *prev_icv)
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task->parent = parent_task;
78 priority_queue_init (&task->children_queue);
79 task->taskgroup = NULL;
80 task->dependers = NULL;
81 task->depend_hash = NULL;
82 task->taskwait = NULL;
83 task->depend_count = 0;
84 task->completion_sem = NULL;
85 task->deferred_p = false;
86 task->icv = *prev_icv;
87 task->kind = GOMP_TASK_IMPLICIT;
88 task->in_tied_task = false;
89 task->final_task = false;
90 task->copy_ctors_done = false;
91 task->parent_depends_on = false;
94 /* Clean up a task, after completing it. */
96 void
97 gomp_end_task (void)
99 struct gomp_thread *thr = gomp_thread ();
100 struct gomp_task *task = thr->task;
102 gomp_finish_task (task);
103 thr->task = task->parent;
106 /* Clear the parent field of every task in LIST. */
108 static inline void
109 gomp_clear_parent_in_list (struct priority_list *list)
111 struct priority_node *p = list->tasks;
112 if (p)
115 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
116 p = p->next;
118 while (p != list->tasks);
121 /* Splay tree version of gomp_clear_parent_in_list.
123 Clear the parent field of every task in NODE within SP, and free
124 the node when done. */
126 static void
127 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
129 if (!node)
130 return;
131 prio_splay_tree_node left = node->left, right = node->right;
132 gomp_clear_parent_in_list (&node->key.l);
133 #if _LIBGOMP_CHECKING_
134 memset (node, 0xaf, sizeof (*node));
135 #endif
136 /* No need to remove the node from the tree. We're nuking
137 everything, so just free the nodes and our caller can clear the
138 entire splay tree. */
139 free (node);
140 gomp_clear_parent_in_tree (sp, left);
141 gomp_clear_parent_in_tree (sp, right);
144 /* Clear the parent field of every task in Q and remove every task
145 from Q. */
147 static inline void
148 gomp_clear_parent (struct priority_queue *q)
150 if (priority_queue_multi_p (q))
152 gomp_clear_parent_in_tree (&q->t, q->t.root);
153 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 No need to remove anything. We can just nuke everything. */
155 q->t.root = NULL;
157 else
158 gomp_clear_parent_in_list (&q->l);
161 /* Helper function for GOMP_task and gomp_create_target_task.
163 For a TASK with in/out dependencies, fill in the various dependency
164 queues. PARENT is the parent of said task. DEPEND is as in
165 GOMP_task. */
167 static void
168 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
169 void **depend)
171 size_t ndepend = (uintptr_t) depend[0];
172 size_t i;
173 hash_entry_type ent;
175 if (ndepend)
177 /* depend[0] is total # */
178 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
179 /* ndepend - nout is # of in: */
180 for (i = 0; i < ndepend; i++)
182 task->depend[i].addr = depend[2 + i];
183 task->depend[i].is_in = i >= nout;
186 else
188 ndepend = (uintptr_t) depend[1]; /* total # */
189 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
190 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
191 /* For now we treat mutexinoutset like out, which is compliant, but
192 inefficient. */
193 size_t nin = (uintptr_t) depend[4]; /* # of in: */
194 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195 size_t normal = nout + nmutexinoutset + nin;
196 size_t n = 0;
197 for (i = normal; i < ndepend; i++)
199 void **d = (void **) (uintptr_t) depend[5 + i];
200 switch ((uintptr_t) d[1])
202 case GOMP_DEPEND_OUT:
203 case GOMP_DEPEND_INOUT:
204 case GOMP_DEPEND_MUTEXINOUTSET:
205 break;
206 case GOMP_DEPEND_IN:
207 continue;
208 default:
209 gomp_fatal ("unknown omp_depend_t dependence type %d",
210 (int) (uintptr_t) d[1]);
212 task->depend[n].addr = d[0];
213 task->depend[n++].is_in = 0;
215 for (i = 0; i < normal; i++)
217 task->depend[n].addr = depend[5 + i];
218 task->depend[n++].is_in = i >= nout + nmutexinoutset;
220 for (i = normal; i < ndepend; i++)
222 void **d = (void **) (uintptr_t) depend[5 + i];
223 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
224 continue;
225 task->depend[n].addr = d[0];
226 task->depend[n++].is_in = 1;
229 task->depend_count = ndepend;
230 task->num_dependees = 0;
231 if (parent->depend_hash == NULL)
232 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
233 for (i = 0; i < ndepend; i++)
235 task->depend[i].next = NULL;
236 task->depend[i].prev = NULL;
237 task->depend[i].task = task;
238 task->depend[i].redundant = false;
239 task->depend[i].redundant_out = false;
241 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
242 &task->depend[i], INSERT);
243 hash_entry_type out = NULL, last = NULL;
244 if (*slot)
246 /* If multiple depends on the same task are the same, all but the
247 first one are redundant. As inout/out come first, if any of them
248 is inout/out, it will win, which is the right semantics. */
249 if ((*slot)->task == task)
251 task->depend[i].redundant = true;
252 continue;
254 for (ent = *slot; ent; ent = ent->next)
256 if (ent->redundant_out)
257 break;
259 last = ent;
261 /* depend(in:...) doesn't depend on earlier depend(in:...). */
262 if (task->depend[i].is_in && ent->is_in)
263 continue;
265 if (!ent->is_in)
266 out = ent;
268 struct gomp_task *tsk = ent->task;
269 if (tsk->dependers == NULL)
271 tsk->dependers
272 = gomp_malloc (sizeof (struct gomp_dependers_vec)
273 + 6 * sizeof (struct gomp_task *));
274 tsk->dependers->n_elem = 1;
275 tsk->dependers->allocated = 6;
276 tsk->dependers->elem[0] = task;
277 task->num_dependees++;
278 continue;
280 /* We already have some other dependency on tsk from earlier
281 depend clause. */
282 else if (tsk->dependers->n_elem
283 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
284 == task))
285 continue;
286 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
288 tsk->dependers->allocated
289 = tsk->dependers->allocated * 2 + 2;
290 tsk->dependers
291 = gomp_realloc (tsk->dependers,
292 sizeof (struct gomp_dependers_vec)
293 + (tsk->dependers->allocated
294 * sizeof (struct gomp_task *)));
296 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
297 task->num_dependees++;
299 task->depend[i].next = *slot;
300 (*slot)->prev = &task->depend[i];
302 *slot = &task->depend[i];
304 /* There is no need to store more than one depend({,in}out:) task per
305 address in the hash table chain for the purpose of creation of
306 deferred tasks, because each out depends on all earlier outs, thus it
307 is enough to record just the last depend({,in}out:). For depend(in:),
308 we need to keep all of the previous ones not terminated yet, because
309 a later depend({,in}out:) might need to depend on all of them. So, if
310 the new task's clause is depend({,in}out:), we know there is at most
311 one other depend({,in}out:) clause in the list (out). For
312 non-deferred tasks we want to see all outs, so they are moved to the
313 end of the chain, after first redundant_out entry all following
314 entries should be redundant_out. */
315 if (!task->depend[i].is_in && out)
317 if (out != last)
319 out->next->prev = out->prev;
320 out->prev->next = out->next;
321 out->next = last->next;
322 out->prev = last;
323 last->next = out;
324 if (out->next)
325 out->next->prev = out;
327 out->redundant_out = true;
332 /* Called when encountering an explicit task directive. If IF_CLAUSE is
333 false, then we must not delay in executing the task. If UNTIED is true,
334 then the task may be executed by any member of the team.
336 DEPEND is an array containing:
337 if depend[0] is non-zero, then:
338 depend[0]: number of depend elements.
339 depend[1]: number of depend elements of type "out/inout".
340 depend[2..N+1]: address of [1..N]th depend element.
341 otherwise, when depend[0] is zero, then:
342 depend[1]: number of depend elements.
343 depend[2]: number of depend elements of type "out/inout".
344 depend[3]: number of depend elements of type "mutexinoutset".
345 depend[4]: number of depend elements of type "in".
346 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 omp_depend_t objects. */
350 void
351 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
352 long arg_size, long arg_align, bool if_clause, unsigned flags,
353 void **depend, int priority_arg, void *detach)
355 struct gomp_thread *thr = gomp_thread ();
356 struct gomp_team *team = thr->ts.team;
357 int priority = 0;
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361 tied to one thread all the time. This means UNTIED tasks must be
362 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363 might be running on different thread than FN. */
364 if (cpyfn)
365 if_clause = false;
366 flags &= ~GOMP_TASK_FLAG_UNTIED;
367 #endif
369 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
370 if (__builtin_expect (gomp_cancel_var, 0) && team)
372 if (gomp_team_barrier_cancelled (&team->barrier))
373 return;
374 if (thr->task->taskgroup)
376 if (thr->task->taskgroup->cancelled)
377 return;
378 if (thr->task->taskgroup->workshare
379 && thr->task->taskgroup->prev
380 && thr->task->taskgroup->prev->cancelled)
381 return;
385 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
387 priority = priority_arg;
388 if (priority > gomp_max_task_priority_var)
389 priority = gomp_max_task_priority_var;
392 if (!if_clause || team == NULL
393 || (thr->task && thr->task->final_task)
394 || team->task_count > 64 * team->nthreads)
396 struct gomp_task task;
397 gomp_sem_t completion_sem;
399 /* If there are depend clauses and earlier deferred sibling tasks
400 with depend clauses, check if there isn't a dependency. If there
401 is, we need to wait for them. There is no need to handle
402 depend clauses for non-deferred tasks other than this, because
403 the parent task is suspended until the child task finishes and thus
404 it can't start further child tasks. */
405 if ((flags & GOMP_TASK_FLAG_DEPEND)
406 && thr->task && thr->task->depend_hash)
407 gomp_task_maybe_wait_for_dependencies (depend);
409 gomp_init_task (&task, thr->task, gomp_icv (false));
410 task.kind = GOMP_TASK_UNDEFERRED;
411 task.final_task = (thr->task && thr->task->final_task)
412 || (flags & GOMP_TASK_FLAG_FINAL);
413 task.priority = priority;
415 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
417 gomp_sem_init (&completion_sem, 0);
418 task.completion_sem = &completion_sem;
419 *(void **) detach = &task;
420 if (data)
421 *(void **) data = &task;
423 gomp_debug (0, "Thread %d: new event: %p\n",
424 thr->ts.team_id, &task);
427 if (thr->task)
429 task.in_tied_task = thr->task->in_tied_task;
430 task.taskgroup = thr->task->taskgroup;
432 thr->task = &task;
433 if (__builtin_expect (cpyfn != NULL, 0))
435 char buf[arg_size + arg_align - 1];
436 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
437 & ~(uintptr_t) (arg_align - 1));
438 cpyfn (arg, data);
439 fn (arg);
441 else
442 fn (data);
444 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
446 gomp_sem_wait (&completion_sem);
447 gomp_sem_destroy (&completion_sem);
450 /* Access to "children" is normally done inside a task_lock
451 mutex region, but the only way this particular task.children
452 can be set is if this thread's task work function (fn)
453 creates children. So since the setter is *this* thread, we
454 need no barriers here when testing for non-NULL. We can have
455 task.children set by the current thread then changed by a
456 child thread, but seeing a stale non-NULL value is not a
457 problem. Once past the task_lock acquisition, this thread
458 will see the real value of task.children. */
459 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
461 gomp_mutex_lock (&team->task_lock);
462 gomp_clear_parent (&task.children_queue);
463 gomp_mutex_unlock (&team->task_lock);
465 gomp_end_task ();
467 else
469 struct gomp_task *task;
470 struct gomp_task *parent = thr->task;
471 struct gomp_taskgroup *taskgroup = parent->taskgroup;
472 char *arg;
473 bool do_wake;
474 size_t depend_size = 0;
476 if (flags & GOMP_TASK_FLAG_DEPEND)
477 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
478 * sizeof (struct gomp_task_depend_entry));
479 task = gomp_malloc (sizeof (*task) + depend_size
480 + arg_size + arg_align - 1);
481 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
482 & ~(uintptr_t) (arg_align - 1));
483 gomp_init_task (task, parent, gomp_icv (false));
484 task->priority = priority;
485 task->kind = GOMP_TASK_UNDEFERRED;
486 task->in_tied_task = parent->in_tied_task;
487 task->taskgroup = taskgroup;
488 task->deferred_p = true;
489 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
491 task->detach_team = team;
493 *(void **) detach = task;
494 if (data)
495 *(void **) data = task;
497 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
499 thr->task = task;
500 if (cpyfn)
502 cpyfn (arg, data);
503 task->copy_ctors_done = true;
505 else
506 memcpy (arg, data, arg_size);
507 thr->task = parent;
508 task->kind = GOMP_TASK_WAITING;
509 task->fn = fn;
510 task->fn_data = arg;
511 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
512 gomp_mutex_lock (&team->task_lock);
513 /* If parallel or taskgroup has been cancelled, don't start new
514 tasks. */
515 if (__builtin_expect (gomp_cancel_var, 0)
516 && !task->copy_ctors_done)
518 if (gomp_team_barrier_cancelled (&team->barrier))
520 do_cancel:
521 gomp_mutex_unlock (&team->task_lock);
522 gomp_finish_task (task);
523 free (task);
524 return;
526 if (taskgroup)
528 if (taskgroup->cancelled)
529 goto do_cancel;
530 if (taskgroup->workshare
531 && taskgroup->prev
532 && taskgroup->prev->cancelled)
533 goto do_cancel;
536 if (taskgroup)
537 taskgroup->num_children++;
538 if (depend_size)
540 gomp_task_handle_depend (task, parent, depend);
541 if (task->num_dependees)
543 /* Tasks that depend on other tasks are not put into the
544 various waiting queues, so we are done for now. Said
545 tasks are instead put into the queues via
546 gomp_task_run_post_handle_dependers() after their
547 dependencies have been satisfied. After which, they
548 can be picked up by the various scheduling
549 points. */
550 gomp_mutex_unlock (&team->task_lock);
551 return;
555 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
556 task, priority,
557 PRIORITY_INSERT_BEGIN,
558 /*adjust_parent_depends_on=*/false,
559 task->parent_depends_on);
560 if (taskgroup)
561 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
562 task, priority,
563 PRIORITY_INSERT_BEGIN,
564 /*adjust_parent_depends_on=*/false,
565 task->parent_depends_on);
567 priority_queue_insert (PQ_TEAM, &team->task_queue,
568 task, priority,
569 PRIORITY_INSERT_END,
570 /*adjust_parent_depends_on=*/false,
571 task->parent_depends_on);
573 ++team->task_count;
574 ++team->task_queued_count;
575 gomp_team_barrier_set_task_pending (&team->barrier);
576 do_wake = team->task_running_count + !parent->in_tied_task
577 < team->nthreads;
578 gomp_mutex_unlock (&team->task_lock);
579 if (do_wake)
580 gomp_team_barrier_wake (&team->barrier, 1);
584 ialias (GOMP_taskgroup_start)
585 ialias (GOMP_taskgroup_end)
586 ialias (GOMP_taskgroup_reduction_register)
588 #define TYPE long
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
592 #undef TYPE
593 #undef UTYPE
594 #undef TYPE_is_long
596 #define TYPE unsigned long long
597 #define UTYPE TYPE
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
600 #undef TYPE
601 #undef UTYPE
602 #undef GOMP_taskloop
604 static void inline
605 priority_queue_move_task_first (enum priority_queue_type type,
606 struct priority_queue *head,
607 struct gomp_task *task)
609 #if _LIBGOMP_CHECKING_
610 if (!priority_queue_task_in_queue_p (type, head, task))
611 gomp_fatal ("Attempt to move first missing task %p", task);
612 #endif
613 struct priority_list *list;
614 if (priority_queue_multi_p (head))
616 list = priority_queue_lookup_priority (head, task->priority);
617 #if _LIBGOMP_CHECKING_
618 if (!list)
619 gomp_fatal ("Unable to find priority %d", task->priority);
620 #endif
622 else
623 list = &head->l;
624 priority_list_remove (list, task_to_priority_node (type, task), 0);
625 priority_list_insert (type, list, task, task->priority,
626 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
627 task->parent_depends_on);
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631 with team->task_lock held, or is executed in the thread that called
632 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633 run before it acquires team->task_lock. */
635 static void
636 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
638 struct gomp_task *parent = task->parent;
639 if (parent)
640 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
641 task);
643 struct gomp_taskgroup *taskgroup = task->taskgroup;
644 if (taskgroup)
645 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
646 task);
648 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
649 PRIORITY_INSERT_BEGIN, false,
650 task->parent_depends_on);
651 task->kind = GOMP_TASK_WAITING;
652 if (parent && parent->taskwait)
654 if (parent->taskwait->in_taskwait)
656 /* One more task has had its dependencies met.
657 Inform any waiters. */
658 parent->taskwait->in_taskwait = false;
659 gomp_sem_post (&parent->taskwait->taskwait_sem);
661 else if (parent->taskwait->in_depend_wait)
663 /* One more task has had its dependencies met.
664 Inform any waiters. */
665 parent->taskwait->in_depend_wait = false;
666 gomp_sem_post (&parent->taskwait->taskwait_sem);
669 if (taskgroup && taskgroup->in_taskgroup_wait)
671 /* One more task has had its dependencies met.
672 Inform any waiters. */
673 taskgroup->in_taskgroup_wait = false;
674 gomp_sem_post (&taskgroup->taskgroup_sem);
677 ++team->task_queued_count;
678 gomp_team_barrier_set_task_pending (&team->barrier);
679 /* I'm afraid this can't be done after releasing team->task_lock,
680 as gomp_target_task_completion is run from unrelated thread and
681 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682 the team could be gone already. */
683 if (team->nthreads > team->task_running_count)
684 gomp_team_barrier_wake (&team->barrier, 1);
687 /* Signal that a target task TTASK has completed the asynchronously
688 running phase and should be requeued as a task to handle the
689 variable unmapping. */
691 void
692 GOMP_PLUGIN_target_task_completion (void *data)
694 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
695 struct gomp_task *task = ttask->task;
696 struct gomp_team *team = ttask->team;
698 gomp_mutex_lock (&team->task_lock);
699 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
701 ttask->state = GOMP_TARGET_TASK_FINISHED;
702 gomp_mutex_unlock (&team->task_lock);
703 return;
705 ttask->state = GOMP_TARGET_TASK_FINISHED;
706 gomp_target_task_completion (team, task);
707 gomp_mutex_unlock (&team->task_lock);
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
712 /* Called for nowait target tasks. */
714 bool
715 gomp_create_target_task (struct gomp_device_descr *devicep,
716 void (*fn) (void *), size_t mapnum, void **hostaddrs,
717 size_t *sizes, unsigned short *kinds,
718 unsigned int flags, void **depend, void **args,
719 enum gomp_target_task_state state)
721 struct gomp_thread *thr = gomp_thread ();
722 struct gomp_team *team = thr->ts.team;
724 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
725 if (__builtin_expect (gomp_cancel_var, 0) && team)
727 if (gomp_team_barrier_cancelled (&team->barrier))
728 return true;
729 if (thr->task->taskgroup)
731 if (thr->task->taskgroup->cancelled)
732 return true;
733 if (thr->task->taskgroup->workshare
734 && thr->task->taskgroup->prev
735 && thr->task->taskgroup->prev->cancelled)
736 return true;
740 struct gomp_target_task *ttask;
741 struct gomp_task *task;
742 struct gomp_task *parent = thr->task;
743 struct gomp_taskgroup *taskgroup = parent->taskgroup;
744 bool do_wake;
745 size_t depend_size = 0;
746 uintptr_t depend_cnt = 0;
747 size_t tgt_align = 0, tgt_size = 0;
748 uintptr_t args_cnt = 0;
750 if (depend != NULL)
752 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
753 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
755 if (fn)
757 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
758 firstprivate on the target task. */
759 size_t i;
760 for (i = 0; i < mapnum; i++)
761 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
763 size_t align = (size_t) 1 << (kinds[i] >> 8);
764 if (tgt_align < align)
765 tgt_align = align;
766 tgt_size = (tgt_size + align - 1) & ~(align - 1);
767 tgt_size += sizes[i];
769 if (tgt_align)
770 tgt_size += tgt_align - 1;
771 else
772 tgt_size = 0;
773 if (args)
775 void **cargs = args;
776 while (*cargs)
778 intptr_t id = (intptr_t) *cargs++;
779 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
780 cargs++;
782 args_cnt = cargs + 1 - args;
786 task = gomp_malloc (sizeof (*task) + depend_size
787 + sizeof (*ttask)
788 + args_cnt * sizeof (void *)
789 + mapnum * (sizeof (void *) + sizeof (size_t)
790 + sizeof (unsigned short))
791 + tgt_size);
792 gomp_init_task (task, parent, gomp_icv (false));
793 task->priority = 0;
794 task->kind = GOMP_TASK_WAITING;
795 task->in_tied_task = parent->in_tied_task;
796 task->taskgroup = taskgroup;
797 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
798 ttask->devicep = devicep;
799 ttask->fn = fn;
800 ttask->mapnum = mapnum;
801 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
802 if (args_cnt)
804 ttask->args = (void **) &ttask->hostaddrs[mapnum];
805 memcpy (ttask->args, args, args_cnt * sizeof (void *));
806 ttask->sizes = (size_t *) &ttask->args[args_cnt];
808 else
810 ttask->args = args;
811 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
813 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
814 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
815 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
816 if (tgt_align)
818 char *tgt = (char *) &ttask->kinds[mapnum];
819 size_t i;
820 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
821 if (al)
822 tgt += tgt_align - al;
823 tgt_size = 0;
824 for (i = 0; i < mapnum; i++)
825 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
827 size_t align = (size_t) 1 << (kinds[i] >> 8);
828 tgt_size = (tgt_size + align - 1) & ~(align - 1);
829 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
830 ttask->hostaddrs[i] = tgt + tgt_size;
831 tgt_size = tgt_size + sizes[i];
834 ttask->flags = flags;
835 ttask->state = state;
836 ttask->task = task;
837 ttask->team = team;
838 task->fn = NULL;
839 task->fn_data = ttask;
840 task->final_task = 0;
841 gomp_mutex_lock (&team->task_lock);
842 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
843 if (__builtin_expect (gomp_cancel_var, 0))
845 if (gomp_team_barrier_cancelled (&team->barrier))
847 do_cancel:
848 gomp_mutex_unlock (&team->task_lock);
849 gomp_finish_task (task);
850 free (task);
851 return true;
853 if (taskgroup)
855 if (taskgroup->cancelled)
856 goto do_cancel;
857 if (taskgroup->workshare
858 && taskgroup->prev
859 && taskgroup->prev->cancelled)
860 goto do_cancel;
863 if (depend_size)
865 gomp_task_handle_depend (task, parent, depend);
866 if (task->num_dependees)
868 if (taskgroup)
869 taskgroup->num_children++;
870 gomp_mutex_unlock (&team->task_lock);
871 return true;
874 if (state == GOMP_TARGET_TASK_DATA)
876 gomp_task_run_post_handle_depend_hash (task);
877 gomp_mutex_unlock (&team->task_lock);
878 gomp_finish_task (task);
879 free (task);
880 return false;
882 if (taskgroup)
883 taskgroup->num_children++;
884 /* For async offloading, if we don't need to wait for dependencies,
885 run the gomp_target_task_fn right away, essentially schedule the
886 mapping part of the task in the current thread. */
887 if (devicep != NULL
888 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
890 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
891 PRIORITY_INSERT_END,
892 /*adjust_parent_depends_on=*/false,
893 task->parent_depends_on);
894 if (taskgroup)
895 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
896 task, 0, PRIORITY_INSERT_END,
897 /*adjust_parent_depends_on=*/false,
898 task->parent_depends_on);
899 task->pnode[PQ_TEAM].next = NULL;
900 task->pnode[PQ_TEAM].prev = NULL;
901 task->kind = GOMP_TASK_TIED;
902 ++team->task_count;
903 gomp_mutex_unlock (&team->task_lock);
905 thr->task = task;
906 gomp_target_task_fn (task->fn_data);
907 thr->task = parent;
909 gomp_mutex_lock (&team->task_lock);
910 task->kind = GOMP_TASK_ASYNC_RUNNING;
911 /* If GOMP_PLUGIN_target_task_completion has run already
912 in between gomp_target_task_fn and the mutex lock,
913 perform the requeuing here. */
914 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
915 gomp_target_task_completion (team, task);
916 else
917 ttask->state = GOMP_TARGET_TASK_RUNNING;
918 gomp_mutex_unlock (&team->task_lock);
919 return true;
921 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
922 PRIORITY_INSERT_BEGIN,
923 /*adjust_parent_depends_on=*/false,
924 task->parent_depends_on);
925 if (taskgroup)
926 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
927 PRIORITY_INSERT_BEGIN,
928 /*adjust_parent_depends_on=*/false,
929 task->parent_depends_on);
930 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
931 PRIORITY_INSERT_END,
932 /*adjust_parent_depends_on=*/false,
933 task->parent_depends_on);
934 ++team->task_count;
935 ++team->task_queued_count;
936 gomp_team_barrier_set_task_pending (&team->barrier);
937 do_wake = team->task_running_count + !parent->in_tied_task
938 < team->nthreads;
939 gomp_mutex_unlock (&team->task_lock);
940 if (do_wake)
941 gomp_team_barrier_wake (&team->barrier, 1);
942 return true;
945 /* Given a parent_depends_on task in LIST, move it to the front of its
946 priority so it is run as soon as possible.
948 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
950 We rearrange the queue such that all parent_depends_on tasks are
951 first, and last_parent_depends_on points to the last such task we
952 rearranged. For example, given the following tasks in a queue
953 where PD[123] are the parent_depends_on tasks:
955 task->children
958 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
960 We rearrange such that:
962 task->children
963 | +--- last_parent_depends_on
966 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
968 static void inline
969 priority_list_upgrade_task (struct priority_list *list,
970 struct priority_node *node)
972 struct priority_node *last_parent_depends_on
973 = list->last_parent_depends_on;
974 if (last_parent_depends_on)
976 node->prev->next = node->next;
977 node->next->prev = node->prev;
978 node->prev = last_parent_depends_on;
979 node->next = last_parent_depends_on->next;
980 node->prev->next = node;
981 node->next->prev = node;
983 else if (node != list->tasks)
985 node->prev->next = node->next;
986 node->next->prev = node->prev;
987 node->prev = list->tasks->prev;
988 node->next = list->tasks;
989 list->tasks = node;
990 node->prev->next = node;
991 node->next->prev = node;
993 list->last_parent_depends_on = node;
996 /* Given a parent_depends_on TASK in its parent's children_queue, move
997 it to the front of its priority so it is run as soon as possible.
999 PARENT is passed as an optimization.
1001 (This function could be defined in priority_queue.c, but we want it
1002 inlined, and putting it in priority_queue.h is not an option, given
1003 that gomp_task has not been properly defined at that point). */
1005 static void inline
1006 priority_queue_upgrade_task (struct gomp_task *task,
1007 struct gomp_task *parent)
1009 struct priority_queue *head = &parent->children_queue;
1010 struct priority_node *node = &task->pnode[PQ_CHILDREN];
1011 #if _LIBGOMP_CHECKING_
1012 if (!task->parent_depends_on)
1013 gomp_fatal ("priority_queue_upgrade_task: task must be a "
1014 "parent_depends_on task");
1015 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
1016 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
1017 #endif
1018 if (priority_queue_multi_p (head))
1020 struct priority_list *list
1021 = priority_queue_lookup_priority (head, task->priority);
1022 priority_list_upgrade_task (list, node);
1024 else
1025 priority_list_upgrade_task (&head->l, node);
1028 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1029 the way in LIST so that other tasks can be considered for
1030 execution. LIST contains tasks of type TYPE.
1032 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1033 if applicable. */
1035 static void inline
1036 priority_list_downgrade_task (enum priority_queue_type type,
1037 struct priority_list *list,
1038 struct gomp_task *child_task)
1040 struct priority_node *node = task_to_priority_node (type, child_task);
1041 if (list->tasks == node)
1042 list->tasks = node->next;
1043 else if (node->next != list->tasks)
1045 /* The task in NODE is about to become TIED and TIED tasks
1046 cannot come before WAITING tasks. If we're about to
1047 leave the queue in such an indeterminate state, rewire
1048 things appropriately. However, a TIED task at the end is
1049 perfectly fine. */
1050 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1051 if (next_task->kind == GOMP_TASK_WAITING)
1053 /* Remove from list. */
1054 node->prev->next = node->next;
1055 node->next->prev = node->prev;
1056 /* Rewire at the end. */
1057 node->next = list->tasks;
1058 node->prev = list->tasks->prev;
1059 list->tasks->prev->next = node;
1060 list->tasks->prev = node;
1064 /* If the current task is the last_parent_depends_on for its
1065 priority, adjust last_parent_depends_on appropriately. */
1066 if (__builtin_expect (child_task->parent_depends_on, 0)
1067 && list->last_parent_depends_on == node)
1069 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1070 if (node->prev != node
1071 && prev_child->kind == GOMP_TASK_WAITING
1072 && prev_child->parent_depends_on)
1073 list->last_parent_depends_on = node->prev;
1074 else
1076 /* There are no more parent_depends_on entries waiting
1077 to run, clear the list. */
1078 list->last_parent_depends_on = NULL;
1083 /* Given a TASK in HEAD that is about to be executed, move it out of
1084 the way so that other tasks can be considered for execution. HEAD
1085 contains tasks of type TYPE.
1087 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1088 if applicable.
1090 (This function could be defined in priority_queue.c, but we want it
1091 inlined, and putting it in priority_queue.h is not an option, given
1092 that gomp_task has not been properly defined at that point). */
1094 static void inline
1095 priority_queue_downgrade_task (enum priority_queue_type type,
1096 struct priority_queue *head,
1097 struct gomp_task *task)
1099 #if _LIBGOMP_CHECKING_
1100 if (!priority_queue_task_in_queue_p (type, head, task))
1101 gomp_fatal ("Attempt to downgrade missing task %p", task);
1102 #endif
1103 if (priority_queue_multi_p (head))
1105 struct priority_list *list
1106 = priority_queue_lookup_priority (head, task->priority);
1107 priority_list_downgrade_task (type, list, task);
1109 else
1110 priority_list_downgrade_task (type, &head->l, task);
1113 /* Setup CHILD_TASK to execute. This is done by setting the task to
1114 TIED, and updating all relevant queues so that CHILD_TASK is no
1115 longer chosen for scheduling. Also, remove CHILD_TASK from the
1116 overall team task queue entirely.
1118 Return TRUE if task or its containing taskgroup has been
1119 cancelled. */
1121 static inline bool
1122 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1123 struct gomp_team *team)
1125 #if _LIBGOMP_CHECKING_
1126 if (child_task->parent)
1127 priority_queue_verify (PQ_CHILDREN,
1128 &child_task->parent->children_queue, true);
1129 if (child_task->taskgroup)
1130 priority_queue_verify (PQ_TASKGROUP,
1131 &child_task->taskgroup->taskgroup_queue, false);
1132 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1133 #endif
1135 /* Task is about to go tied, move it out of the way. */
1136 if (parent)
1137 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1138 child_task);
1140 /* Task is about to go tied, move it out of the way. */
1141 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1142 if (taskgroup)
1143 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1144 child_task);
1146 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1147 MEMMODEL_RELAXED);
1148 child_task->pnode[PQ_TEAM].next = NULL;
1149 child_task->pnode[PQ_TEAM].prev = NULL;
1150 child_task->kind = GOMP_TASK_TIED;
1152 if (--team->task_queued_count == 0)
1153 gomp_team_barrier_clear_task_pending (&team->barrier);
1154 if (__builtin_expect (gomp_cancel_var, 0)
1155 && !child_task->copy_ctors_done)
1157 if (gomp_team_barrier_cancelled (&team->barrier))
1158 return true;
1159 if (taskgroup)
1161 if (taskgroup->cancelled)
1162 return true;
1163 if (taskgroup->workshare
1164 && taskgroup->prev
1165 && taskgroup->prev->cancelled)
1166 return true;
1169 return false;
1172 static void
1173 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1175 struct gomp_task *parent = child_task->parent;
1176 size_t i;
1178 for (i = 0; i < child_task->depend_count; i++)
1179 if (!child_task->depend[i].redundant)
1181 if (child_task->depend[i].next)
1182 child_task->depend[i].next->prev = child_task->depend[i].prev;
1183 if (child_task->depend[i].prev)
1184 child_task->depend[i].prev->next = child_task->depend[i].next;
1185 else
1187 hash_entry_type *slot
1188 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1189 NO_INSERT);
1190 if (*slot != &child_task->depend[i])
1191 abort ();
1192 if (child_task->depend[i].next)
1193 *slot = child_task->depend[i].next;
1194 else
1195 htab_clear_slot (parent->depend_hash, slot);
1200 /* After a CHILD_TASK has been run, adjust the dependency queue for
1201 each task that depends on CHILD_TASK, to record the fact that there
1202 is one less dependency to worry about. If a task that depended on
1203 CHILD_TASK now has no dependencies, place it in the various queues
1204 so it gets scheduled to run.
1206 TEAM is the team to which CHILD_TASK belongs to. */
1208 static size_t
1209 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1210 struct gomp_team *team)
1212 struct gomp_task *parent = child_task->parent;
1213 size_t i, count = child_task->dependers->n_elem, ret = 0;
1214 for (i = 0; i < count; i++)
1216 struct gomp_task *task = child_task->dependers->elem[i];
1218 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1219 TASK's remaining dependencies. Once TASK has no other
1220 dependencies, put it into the various queues so it will get
1221 scheduled for execution. */
1222 if (--task->num_dependees != 0)
1223 continue;
1225 struct gomp_taskgroup *taskgroup = task->taskgroup;
1226 if (parent)
1228 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1229 task, task->priority,
1230 PRIORITY_INSERT_BEGIN,
1231 /*adjust_parent_depends_on=*/true,
1232 task->parent_depends_on);
1233 if (parent->taskwait)
1235 if (parent->taskwait->in_taskwait)
1237 /* One more task has had its dependencies met.
1238 Inform any waiters. */
1239 parent->taskwait->in_taskwait = false;
1240 gomp_sem_post (&parent->taskwait->taskwait_sem);
1242 else if (parent->taskwait->in_depend_wait)
1244 /* One more task has had its dependencies met.
1245 Inform any waiters. */
1246 parent->taskwait->in_depend_wait = false;
1247 gomp_sem_post (&parent->taskwait->taskwait_sem);
1251 if (taskgroup)
1253 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1254 task, task->priority,
1255 PRIORITY_INSERT_BEGIN,
1256 /*adjust_parent_depends_on=*/false,
1257 task->parent_depends_on);
1258 if (taskgroup->in_taskgroup_wait)
1260 /* One more task has had its dependencies met.
1261 Inform any waiters. */
1262 taskgroup->in_taskgroup_wait = false;
1263 gomp_sem_post (&taskgroup->taskgroup_sem);
1266 priority_queue_insert (PQ_TEAM, &team->task_queue,
1267 task, task->priority,
1268 PRIORITY_INSERT_END,
1269 /*adjust_parent_depends_on=*/false,
1270 task->parent_depends_on);
1271 ++team->task_count;
1272 ++team->task_queued_count;
1273 ++ret;
1275 free (child_task->dependers);
1276 child_task->dependers = NULL;
1277 if (ret > 1)
1278 gomp_team_barrier_set_task_pending (&team->barrier);
1279 return ret;
1282 static inline size_t
1283 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1284 struct gomp_team *team)
1286 if (child_task->depend_count == 0)
1287 return 0;
1289 /* If parent is gone already, the hash table is freed and nothing
1290 will use the hash table anymore, no need to remove anything from it. */
1291 if (child_task->parent != NULL)
1292 gomp_task_run_post_handle_depend_hash (child_task);
1294 if (child_task->dependers == NULL)
1295 return 0;
1297 return gomp_task_run_post_handle_dependers (child_task, team);
1300 /* Remove CHILD_TASK from its parent. */
1302 static inline void
1303 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1305 struct gomp_task *parent = child_task->parent;
1306 if (parent == NULL)
1307 return;
1309 /* If this was the last task the parent was depending on,
1310 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1311 clean up and return. */
1312 if (__builtin_expect (child_task->parent_depends_on, 0)
1313 && --parent->taskwait->n_depend == 0
1314 && parent->taskwait->in_depend_wait)
1316 parent->taskwait->in_depend_wait = false;
1317 gomp_sem_post (&parent->taskwait->taskwait_sem);
1320 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1321 child_task, MEMMODEL_RELEASE)
1322 && parent->taskwait && parent->taskwait->in_taskwait)
1324 parent->taskwait->in_taskwait = false;
1325 gomp_sem_post (&parent->taskwait->taskwait_sem);
1327 child_task->pnode[PQ_CHILDREN].next = NULL;
1328 child_task->pnode[PQ_CHILDREN].prev = NULL;
1331 /* Remove CHILD_TASK from its taskgroup. */
1333 static inline void
1334 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1336 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1337 if (taskgroup == NULL)
1338 return;
1339 bool empty = priority_queue_remove (PQ_TASKGROUP,
1340 &taskgroup->taskgroup_queue,
1341 child_task, MEMMODEL_RELAXED);
1342 child_task->pnode[PQ_TASKGROUP].next = NULL;
1343 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1344 if (taskgroup->num_children > 1)
1345 --taskgroup->num_children;
1346 else
1348 /* We access taskgroup->num_children in GOMP_taskgroup_end
1349 outside of the task lock mutex region, so
1350 need a release barrier here to ensure memory
1351 written by child_task->fn above is flushed
1352 before the NULL is written. */
1353 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1355 if (empty && taskgroup->in_taskgroup_wait)
1357 taskgroup->in_taskgroup_wait = false;
1358 gomp_sem_post (&taskgroup->taskgroup_sem);
1362 void
1363 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1365 struct gomp_thread *thr = gomp_thread ();
1366 struct gomp_team *team = thr->ts.team;
1367 struct gomp_task *task = thr->task;
1368 struct gomp_task *child_task = NULL;
1369 struct gomp_task *to_free = NULL;
1370 int do_wake = 0;
1372 gomp_mutex_lock (&team->task_lock);
1373 if (gomp_barrier_last_thread (state))
1375 if (team->task_count == 0)
1377 gomp_team_barrier_done (&team->barrier, state);
1378 gomp_mutex_unlock (&team->task_lock);
1379 gomp_team_barrier_wake (&team->barrier, 0);
1380 return;
1382 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1385 while (1)
1387 bool cancelled = false;
1389 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1391 bool ignored;
1392 child_task
1393 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1394 PQ_IGNORED, NULL,
1395 &ignored);
1396 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1397 team);
1398 if (__builtin_expect (cancelled, 0))
1400 if (to_free)
1402 gomp_finish_task (to_free);
1403 free (to_free);
1404 to_free = NULL;
1406 goto finish_cancelled;
1408 team->task_running_count++;
1409 child_task->in_tied_task = true;
1411 else if (team->task_count == 0
1412 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1414 gomp_team_barrier_done (&team->barrier, state);
1415 gomp_mutex_unlock (&team->task_lock);
1416 gomp_team_barrier_wake (&team->barrier, 0);
1417 if (to_free)
1419 gomp_finish_task (to_free);
1420 free (to_free);
1422 return;
1424 gomp_mutex_unlock (&team->task_lock);
1425 if (do_wake)
1427 gomp_team_barrier_wake (&team->barrier, do_wake);
1428 do_wake = 0;
1430 if (to_free)
1432 gomp_finish_task (to_free);
1433 free (to_free);
1434 to_free = NULL;
1436 if (child_task)
1438 thr->task = child_task;
1439 if (__builtin_expect (child_task->fn == NULL, 0))
1441 if (gomp_target_task_fn (child_task->fn_data))
1443 thr->task = task;
1444 gomp_mutex_lock (&team->task_lock);
1445 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1446 team->task_running_count--;
1447 struct gomp_target_task *ttask
1448 = (struct gomp_target_task *) child_task->fn_data;
1449 /* If GOMP_PLUGIN_target_task_completion has run already
1450 in between gomp_target_task_fn and the mutex lock,
1451 perform the requeuing here. */
1452 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1453 gomp_target_task_completion (team, child_task);
1454 else
1455 ttask->state = GOMP_TARGET_TASK_RUNNING;
1456 child_task = NULL;
1457 continue;
1460 else
1461 child_task->fn (child_task->fn_data);
1462 thr->task = task;
1464 else
1465 return;
1466 gomp_mutex_lock (&team->task_lock);
1467 if (child_task)
1469 if (child_task->detach_team)
1471 assert (child_task->detach_team == team);
1472 child_task->kind = GOMP_TASK_DETACHED;
1473 ++team->task_detach_count;
1474 --team->task_running_count;
1475 gomp_debug (0,
1476 "thread %d: task with event %p finished without "
1477 "completion event fulfilled in team barrier\n",
1478 thr->ts.team_id, child_task);
1479 child_task = NULL;
1480 continue;
1483 finish_cancelled:;
1484 size_t new_tasks
1485 = gomp_task_run_post_handle_depend (child_task, team);
1486 gomp_task_run_post_remove_parent (child_task);
1487 gomp_clear_parent (&child_task->children_queue);
1488 gomp_task_run_post_remove_taskgroup (child_task);
1489 to_free = child_task;
1490 if (!cancelled)
1491 team->task_running_count--;
1492 child_task = NULL;
1493 if (new_tasks > 1)
1495 do_wake = team->nthreads - team->task_running_count;
1496 if (do_wake > new_tasks)
1497 do_wake = new_tasks;
1499 --team->task_count;
1504 /* Called when encountering a taskwait directive.
1506 Wait for all children of the current task. */
1508 void
1509 GOMP_taskwait (void)
1511 struct gomp_thread *thr = gomp_thread ();
1512 struct gomp_team *team = thr->ts.team;
1513 struct gomp_task *task = thr->task;
1514 struct gomp_task *child_task = NULL;
1515 struct gomp_task *to_free = NULL;
1516 struct gomp_taskwait taskwait;
1517 int do_wake = 0;
1519 /* The acquire barrier on load of task->children here synchronizes
1520 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1521 not necessary that we synchronize with other non-NULL writes at
1522 this point, but we must ensure that all writes to memory by a
1523 child thread task work function are seen before we exit from
1524 GOMP_taskwait. */
1525 if (task == NULL
1526 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1527 return;
1529 memset (&taskwait, 0, sizeof (taskwait));
1530 bool child_q = false;
1531 gomp_mutex_lock (&team->task_lock);
1532 while (1)
1534 bool cancelled = false;
1535 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1537 bool destroy_taskwait = task->taskwait != NULL;
1538 task->taskwait = NULL;
1539 gomp_mutex_unlock (&team->task_lock);
1540 if (to_free)
1542 gomp_finish_task (to_free);
1543 free (to_free);
1545 if (destroy_taskwait)
1546 gomp_sem_destroy (&taskwait.taskwait_sem);
1547 return;
1549 struct gomp_task *next_task
1550 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1551 PQ_TEAM, &team->task_queue, &child_q);
1552 if (next_task->kind == GOMP_TASK_WAITING)
1554 child_task = next_task;
1555 cancelled
1556 = gomp_task_run_pre (child_task, task, team);
1557 if (__builtin_expect (cancelled, 0))
1559 if (to_free)
1561 gomp_finish_task (to_free);
1562 free (to_free);
1563 to_free = NULL;
1565 goto finish_cancelled;
1568 else
1570 /* All tasks we are waiting for are either running in other
1571 threads, are detached and waiting for the completion event to be
1572 fulfilled, or they are tasks that have not had their
1573 dependencies met (so they're not even in the queue). Wait
1574 for them. */
1575 if (task->taskwait == NULL)
1577 taskwait.in_depend_wait = false;
1578 gomp_sem_init (&taskwait.taskwait_sem, 0);
1579 task->taskwait = &taskwait;
1581 taskwait.in_taskwait = true;
1583 gomp_mutex_unlock (&team->task_lock);
1584 if (do_wake)
1586 gomp_team_barrier_wake (&team->barrier, do_wake);
1587 do_wake = 0;
1589 if (to_free)
1591 gomp_finish_task (to_free);
1592 free (to_free);
1593 to_free = NULL;
1595 if (child_task)
1597 thr->task = child_task;
1598 if (__builtin_expect (child_task->fn == NULL, 0))
1600 if (gomp_target_task_fn (child_task->fn_data))
1602 thr->task = task;
1603 gomp_mutex_lock (&team->task_lock);
1604 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1605 struct gomp_target_task *ttask
1606 = (struct gomp_target_task *) child_task->fn_data;
1607 /* If GOMP_PLUGIN_target_task_completion has run already
1608 in between gomp_target_task_fn and the mutex lock,
1609 perform the requeuing here. */
1610 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1611 gomp_target_task_completion (team, child_task);
1612 else
1613 ttask->state = GOMP_TARGET_TASK_RUNNING;
1614 child_task = NULL;
1615 continue;
1618 else
1619 child_task->fn (child_task->fn_data);
1620 thr->task = task;
1622 else
1623 gomp_sem_wait (&taskwait.taskwait_sem);
1624 gomp_mutex_lock (&team->task_lock);
1625 if (child_task)
1627 if (child_task->detach_team)
1629 assert (child_task->detach_team == team);
1630 child_task->kind = GOMP_TASK_DETACHED;
1631 ++team->task_detach_count;
1632 gomp_debug (0,
1633 "thread %d: task with event %p finished without "
1634 "completion event fulfilled in taskwait\n",
1635 thr->ts.team_id, child_task);
1636 child_task = NULL;
1637 continue;
1640 finish_cancelled:;
1641 size_t new_tasks
1642 = gomp_task_run_post_handle_depend (child_task, team);
1644 if (child_q)
1646 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1647 child_task, MEMMODEL_RELAXED);
1648 child_task->pnode[PQ_CHILDREN].next = NULL;
1649 child_task->pnode[PQ_CHILDREN].prev = NULL;
1652 gomp_clear_parent (&child_task->children_queue);
1654 gomp_task_run_post_remove_taskgroup (child_task);
1656 to_free = child_task;
1657 child_task = NULL;
1658 team->task_count--;
1659 if (new_tasks > 1)
1661 do_wake = team->nthreads - team->task_running_count
1662 - !task->in_tied_task;
1663 if (do_wake > new_tasks)
1664 do_wake = new_tasks;
1670 /* Called when encountering a taskwait directive with depend clause(s).
1671 Wait as if it was an mergeable included task construct with empty body. */
1673 void
1674 GOMP_taskwait_depend (void **depend)
1676 struct gomp_thread *thr = gomp_thread ();
1677 struct gomp_team *team = thr->ts.team;
1679 /* If parallel or taskgroup has been cancelled, return early. */
1680 if (__builtin_expect (gomp_cancel_var, 0) && team)
1682 if (gomp_team_barrier_cancelled (&team->barrier))
1683 return;
1684 if (thr->task->taskgroup)
1686 if (thr->task->taskgroup->cancelled)
1687 return;
1688 if (thr->task->taskgroup->workshare
1689 && thr->task->taskgroup->prev
1690 && thr->task->taskgroup->prev->cancelled)
1691 return;
1695 if (thr->task && thr->task->depend_hash)
1696 gomp_task_maybe_wait_for_dependencies (depend);
1699 /* An undeferred task is about to run. Wait for all tasks that this
1700 undeferred task depends on.
1702 This is done by first putting all known ready dependencies
1703 (dependencies that have their own dependencies met) at the top of
1704 the scheduling queues. Then we iterate through these imminently
1705 ready tasks (and possibly other high priority tasks), and run them.
1706 If we run out of ready dependencies to execute, we either wait for
1707 the remaining dependencies to finish, or wait for them to get
1708 scheduled so we can run them.
1710 DEPEND is as in GOMP_task. */
1712 void
1713 gomp_task_maybe_wait_for_dependencies (void **depend)
1715 struct gomp_thread *thr = gomp_thread ();
1716 struct gomp_task *task = thr->task;
1717 struct gomp_team *team = thr->ts.team;
1718 struct gomp_task_depend_entry elem, *ent = NULL;
1719 struct gomp_taskwait taskwait;
1720 size_t orig_ndepend = (uintptr_t) depend[0];
1721 size_t nout = (uintptr_t) depend[1];
1722 size_t ndepend = orig_ndepend;
1723 size_t normal = ndepend;
1724 size_t n = 2;
1725 size_t i;
1726 size_t num_awaited = 0;
1727 struct gomp_task *child_task = NULL;
1728 struct gomp_task *to_free = NULL;
1729 int do_wake = 0;
1731 if (ndepend == 0)
1733 ndepend = nout;
1734 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1735 normal = nout + (uintptr_t) depend[4];
1736 n = 5;
1738 gomp_mutex_lock (&team->task_lock);
1739 for (i = 0; i < ndepend; i++)
1741 elem.addr = depend[i + n];
1742 elem.is_in = i >= nout;
1743 if (__builtin_expect (i >= normal, 0))
1745 void **d = (void **) elem.addr;
1746 switch ((uintptr_t) d[1])
1748 case GOMP_DEPEND_IN:
1749 break;
1750 case GOMP_DEPEND_OUT:
1751 case GOMP_DEPEND_INOUT:
1752 case GOMP_DEPEND_MUTEXINOUTSET:
1753 elem.is_in = 0;
1754 break;
1755 default:
1756 gomp_fatal ("unknown omp_depend_t dependence type %d",
1757 (int) (uintptr_t) d[1]);
1759 elem.addr = d[0];
1761 ent = htab_find (task->depend_hash, &elem);
1762 for (; ent; ent = ent->next)
1763 if (elem.is_in && ent->is_in)
1764 continue;
1765 else
1767 struct gomp_task *tsk = ent->task;
1768 if (!tsk->parent_depends_on)
1770 tsk->parent_depends_on = true;
1771 ++num_awaited;
1772 /* If dependency TSK itself has no dependencies and is
1773 ready to run, move it up front so that we run it as
1774 soon as possible. */
1775 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1776 priority_queue_upgrade_task (tsk, task);
1780 if (num_awaited == 0)
1782 gomp_mutex_unlock (&team->task_lock);
1783 return;
1786 memset (&taskwait, 0, sizeof (taskwait));
1787 taskwait.n_depend = num_awaited;
1788 gomp_sem_init (&taskwait.taskwait_sem, 0);
1789 task->taskwait = &taskwait;
1791 while (1)
1793 bool cancelled = false;
1794 if (taskwait.n_depend == 0)
1796 task->taskwait = NULL;
1797 gomp_mutex_unlock (&team->task_lock);
1798 if (to_free)
1800 gomp_finish_task (to_free);
1801 free (to_free);
1803 gomp_sem_destroy (&taskwait.taskwait_sem);
1804 return;
1807 /* Theoretically when we have multiple priorities, we should
1808 chose between the highest priority item in
1809 task->children_queue and team->task_queue here, so we should
1810 use priority_queue_next_task(). However, since we are
1811 running an undeferred task, perhaps that makes all tasks it
1812 depends on undeferred, thus a priority of INF? This would
1813 make it unnecessary to take anything into account here,
1814 but the dependencies.
1816 On the other hand, if we want to use priority_queue_next_task(),
1817 care should be taken to only use priority_queue_remove()
1818 below if the task was actually removed from the children
1819 queue. */
1820 bool ignored;
1821 struct gomp_task *next_task
1822 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1823 PQ_IGNORED, NULL, &ignored);
1825 if (next_task->kind == GOMP_TASK_WAITING)
1827 child_task = next_task;
1828 cancelled
1829 = gomp_task_run_pre (child_task, task, team);
1830 if (__builtin_expect (cancelled, 0))
1832 if (to_free)
1834 gomp_finish_task (to_free);
1835 free (to_free);
1836 to_free = NULL;
1838 goto finish_cancelled;
1841 else
1842 /* All tasks we are waiting for are either running in other
1843 threads, or they are tasks that have not had their
1844 dependencies met (so they're not even in the queue). Wait
1845 for them. */
1846 taskwait.in_depend_wait = true;
1847 gomp_mutex_unlock (&team->task_lock);
1848 if (do_wake)
1850 gomp_team_barrier_wake (&team->barrier, do_wake);
1851 do_wake = 0;
1853 if (to_free)
1855 gomp_finish_task (to_free);
1856 free (to_free);
1857 to_free = NULL;
1859 if (child_task)
1861 thr->task = child_task;
1862 if (__builtin_expect (child_task->fn == NULL, 0))
1864 if (gomp_target_task_fn (child_task->fn_data))
1866 thr->task = task;
1867 gomp_mutex_lock (&team->task_lock);
1868 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1869 struct gomp_target_task *ttask
1870 = (struct gomp_target_task *) child_task->fn_data;
1871 /* If GOMP_PLUGIN_target_task_completion has run already
1872 in between gomp_target_task_fn and the mutex lock,
1873 perform the requeuing here. */
1874 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1875 gomp_target_task_completion (team, child_task);
1876 else
1877 ttask->state = GOMP_TARGET_TASK_RUNNING;
1878 child_task = NULL;
1879 continue;
1882 else
1883 child_task->fn (child_task->fn_data);
1884 thr->task = task;
1886 else
1887 gomp_sem_wait (&taskwait.taskwait_sem);
1888 gomp_mutex_lock (&team->task_lock);
1889 if (child_task)
1891 finish_cancelled:;
1892 size_t new_tasks
1893 = gomp_task_run_post_handle_depend (child_task, team);
1894 if (child_task->parent_depends_on)
1895 --taskwait.n_depend;
1897 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1898 child_task, MEMMODEL_RELAXED);
1899 child_task->pnode[PQ_CHILDREN].next = NULL;
1900 child_task->pnode[PQ_CHILDREN].prev = NULL;
1902 gomp_clear_parent (&child_task->children_queue);
1903 gomp_task_run_post_remove_taskgroup (child_task);
1904 to_free = child_task;
1905 child_task = NULL;
1906 team->task_count--;
1907 if (new_tasks > 1)
1909 do_wake = team->nthreads - team->task_running_count
1910 - !task->in_tied_task;
1911 if (do_wake > new_tasks)
1912 do_wake = new_tasks;
1918 /* Called when encountering a taskyield directive. */
1920 void
1921 GOMP_taskyield (void)
1923 /* Nothing at the moment. */
1926 static inline struct gomp_taskgroup *
1927 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1929 struct gomp_taskgroup *taskgroup
1930 = gomp_malloc (sizeof (struct gomp_taskgroup));
1931 taskgroup->prev = prev;
1932 priority_queue_init (&taskgroup->taskgroup_queue);
1933 taskgroup->reductions = prev ? prev->reductions : NULL;
1934 taskgroup->in_taskgroup_wait = false;
1935 taskgroup->cancelled = false;
1936 taskgroup->workshare = false;
1937 taskgroup->num_children = 0;
1938 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1939 return taskgroup;
1942 void
1943 GOMP_taskgroup_start (void)
1945 struct gomp_thread *thr = gomp_thread ();
1946 struct gomp_team *team = thr->ts.team;
1947 struct gomp_task *task = thr->task;
1949 /* If team is NULL, all tasks are executed as
1950 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1951 taskgroup and their descendant tasks will be finished
1952 by the time GOMP_taskgroup_end is called. */
1953 if (team == NULL)
1954 return;
1955 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1958 void
1959 GOMP_taskgroup_end (void)
1961 struct gomp_thread *thr = gomp_thread ();
1962 struct gomp_team *team = thr->ts.team;
1963 struct gomp_task *task = thr->task;
1964 struct gomp_taskgroup *taskgroup;
1965 struct gomp_task *child_task = NULL;
1966 struct gomp_task *to_free = NULL;
1967 int do_wake = 0;
1969 if (team == NULL)
1970 return;
1971 taskgroup = task->taskgroup;
1972 if (__builtin_expect (taskgroup == NULL, 0)
1973 && thr->ts.level == 0)
1975 /* This can happen if GOMP_taskgroup_start is called when
1976 thr->ts.team == NULL, but inside of the taskgroup there
1977 is #pragma omp target nowait that creates an implicit
1978 team with a single thread. In this case, we want to wait
1979 for all outstanding tasks in this team. */
1980 gomp_team_barrier_wait (&team->barrier);
1981 return;
1984 /* The acquire barrier on load of taskgroup->num_children here
1985 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1986 It is not necessary that we synchronize with other non-0 writes at
1987 this point, but we must ensure that all writes to memory by a
1988 child thread task work function are seen before we exit from
1989 GOMP_taskgroup_end. */
1990 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1991 goto finish;
1993 bool unused;
1994 gomp_mutex_lock (&team->task_lock);
1995 while (1)
1997 bool cancelled = false;
1998 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1999 MEMMODEL_RELAXED))
2001 if (taskgroup->num_children)
2003 if (priority_queue_empty_p (&task->children_queue,
2004 MEMMODEL_RELAXED))
2005 goto do_wait;
2006 child_task
2007 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2008 PQ_TEAM, &team->task_queue,
2009 &unused);
2011 else
2013 gomp_mutex_unlock (&team->task_lock);
2014 if (to_free)
2016 gomp_finish_task (to_free);
2017 free (to_free);
2019 goto finish;
2022 else
2023 child_task
2024 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2025 PQ_TEAM, &team->task_queue, &unused);
2026 if (child_task->kind == GOMP_TASK_WAITING)
2028 cancelled
2029 = gomp_task_run_pre (child_task, child_task->parent, team);
2030 if (__builtin_expect (cancelled, 0))
2032 if (to_free)
2034 gomp_finish_task (to_free);
2035 free (to_free);
2036 to_free = NULL;
2038 goto finish_cancelled;
2041 else
2043 child_task = NULL;
2044 do_wait:
2045 /* All tasks we are waiting for are either running in other
2046 threads, or they are tasks that have not had their
2047 dependencies met (so they're not even in the queue). Wait
2048 for them. */
2049 taskgroup->in_taskgroup_wait = true;
2051 gomp_mutex_unlock (&team->task_lock);
2052 if (do_wake)
2054 gomp_team_barrier_wake (&team->barrier, do_wake);
2055 do_wake = 0;
2057 if (to_free)
2059 gomp_finish_task (to_free);
2060 free (to_free);
2061 to_free = NULL;
2063 if (child_task)
2065 thr->task = child_task;
2066 if (__builtin_expect (child_task->fn == NULL, 0))
2068 if (gomp_target_task_fn (child_task->fn_data))
2070 thr->task = task;
2071 gomp_mutex_lock (&team->task_lock);
2072 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2073 struct gomp_target_task *ttask
2074 = (struct gomp_target_task *) child_task->fn_data;
2075 /* If GOMP_PLUGIN_target_task_completion has run already
2076 in between gomp_target_task_fn and the mutex lock,
2077 perform the requeuing here. */
2078 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2079 gomp_target_task_completion (team, child_task);
2080 else
2081 ttask->state = GOMP_TARGET_TASK_RUNNING;
2082 child_task = NULL;
2083 continue;
2086 else
2087 child_task->fn (child_task->fn_data);
2088 thr->task = task;
2090 else
2091 gomp_sem_wait (&taskgroup->taskgroup_sem);
2092 gomp_mutex_lock (&team->task_lock);
2093 if (child_task)
2095 if (child_task->detach_team)
2097 assert (child_task->detach_team == team);
2098 child_task->kind = GOMP_TASK_DETACHED;
2099 ++team->task_detach_count;
2100 gomp_debug (0,
2101 "thread %d: task with event %p finished without "
2102 "completion event fulfilled in taskgroup\n",
2103 thr->ts.team_id, child_task);
2104 child_task = NULL;
2105 continue;
2108 finish_cancelled:;
2109 size_t new_tasks
2110 = gomp_task_run_post_handle_depend (child_task, team);
2111 gomp_task_run_post_remove_parent (child_task);
2112 gomp_clear_parent (&child_task->children_queue);
2113 gomp_task_run_post_remove_taskgroup (child_task);
2114 to_free = child_task;
2115 child_task = NULL;
2116 team->task_count--;
2117 if (new_tasks > 1)
2119 do_wake = team->nthreads - team->task_running_count
2120 - !task->in_tied_task;
2121 if (do_wake > new_tasks)
2122 do_wake = new_tasks;
2127 finish:
2128 task->taskgroup = taskgroup->prev;
2129 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2130 free (taskgroup);
2133 static inline __attribute__((always_inline)) void
2134 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2135 unsigned nthreads)
2137 size_t total_cnt = 0;
2138 uintptr_t *d = data;
2139 struct htab *old_htab = NULL, *new_htab;
2142 if (__builtin_expect (orig != NULL, 0))
2144 /* For worksharing task reductions, memory has been allocated
2145 already by some other thread that encountered the construct
2146 earlier. */
2147 d[2] = orig[2];
2148 d[6] = orig[6];
2149 orig = (uintptr_t *) orig[4];
2151 else
2153 size_t sz = d[1] * nthreads;
2154 /* Should use omp_alloc if d[3] is not -1. */
2155 void *ptr = gomp_aligned_alloc (d[2], sz);
2156 memset (ptr, '\0', sz);
2157 d[2] = (uintptr_t) ptr;
2158 d[6] = d[2] + sz;
2160 d[5] = 0;
2161 total_cnt += d[0];
2162 if (d[4] == 0)
2164 d[4] = (uintptr_t) old;
2165 break;
2167 else
2168 d = (uintptr_t *) d[4];
2170 while (1);
2171 if (old && old[5])
2173 old_htab = (struct htab *) old[5];
2174 total_cnt += htab_elements (old_htab);
2176 new_htab = htab_create (total_cnt);
2177 if (old_htab)
2179 /* Copy old hash table, like in htab_expand. */
2180 hash_entry_type *p, *olimit;
2181 new_htab->n_elements = htab_elements (old_htab);
2182 olimit = old_htab->entries + old_htab->size;
2183 p = old_htab->entries;
2186 hash_entry_type x = *p;
2187 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2188 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2189 p++;
2191 while (p < olimit);
2193 d = data;
2196 size_t j;
2197 for (j = 0; j < d[0]; ++j)
2199 uintptr_t *p = d + 7 + j * 3;
2200 p[2] = (uintptr_t) d;
2201 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2202 which hash on the first element which is a pointer. We need
2203 to hash also on the first sizeof (uintptr_t) bytes which contain
2204 a pointer. Hide the cast from the compiler. */
2205 hash_entry_type n;
2206 __asm ("" : "=g" (n) : "0" (p));
2207 *htab_find_slot (&new_htab, n, INSERT) = n;
2209 if (d[4] == (uintptr_t) old)
2210 break;
2211 else
2212 d = (uintptr_t *) d[4];
2214 while (1);
2215 d[5] = (uintptr_t) new_htab;
2218 static void
2219 gomp_create_artificial_team (void)
2221 struct gomp_thread *thr = gomp_thread ();
2222 struct gomp_task_icv *icv;
2223 struct gomp_team *team = gomp_new_team (1);
2224 struct gomp_task *task = thr->task;
2225 icv = task ? &task->icv : &gomp_global_icv;
2226 team->prev_ts = thr->ts;
2227 thr->ts.team = team;
2228 thr->ts.team_id = 0;
2229 thr->ts.work_share = &team->work_shares[0];
2230 thr->ts.last_work_share = NULL;
2231 #ifdef HAVE_SYNC_BUILTINS
2232 thr->ts.single_count = 0;
2233 #endif
2234 thr->ts.static_trip = 0;
2235 thr->task = &team->implicit_task[0];
2236 gomp_init_task (thr->task, NULL, icv);
2237 if (task)
2239 thr->task = task;
2240 gomp_end_task ();
2241 free (task);
2242 thr->task = &team->implicit_task[0];
2244 #ifdef LIBGOMP_USE_PTHREADS
2245 else
2246 pthread_setspecific (gomp_thread_destructor, thr);
2247 #endif
2250 /* The format of data is:
2251 data[0] cnt
2252 data[1] size
2253 data[2] alignment (on output array pointer)
2254 data[3] allocator (-1 if malloc allocator)
2255 data[4] next pointer
2256 data[5] used internally (htab pointer)
2257 data[6] used internally (end of array)
2258 cnt times
2259 ent[0] address
2260 ent[1] offset
2261 ent[2] used internally (pointer to data[0])
2262 The entries are sorted by increasing offset, so that a binary
2263 search can be performed. Normally, data[8] is 0, exception is
2264 for worksharing construct task reductions in cancellable parallel,
2265 where at offset 0 there should be space for a pointer and an integer
2266 which are used internally. */
2268 void
2269 GOMP_taskgroup_reduction_register (uintptr_t *data)
2271 struct gomp_thread *thr = gomp_thread ();
2272 struct gomp_team *team = thr->ts.team;
2273 struct gomp_task *task;
2274 unsigned nthreads;
2275 if (__builtin_expect (team == NULL, 0))
2277 /* The task reduction code needs a team and task, so for
2278 orphaned taskgroups just create the implicit team. */
2279 gomp_create_artificial_team ();
2280 ialias_call (GOMP_taskgroup_start) ();
2281 team = thr->ts.team;
2283 nthreads = team->nthreads;
2284 task = thr->task;
2285 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2286 task->taskgroup->reductions = data;
2289 void
2290 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2292 uintptr_t *d = data;
2293 htab_free ((struct htab *) data[5]);
2296 gomp_aligned_free ((void *) d[2]);
2297 d = (uintptr_t *) d[4];
2299 while (d && !d[5]);
2301 ialias (GOMP_taskgroup_reduction_unregister)
2303 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2304 original list item or address of previously remapped original list
2305 item to address of the private copy, store that to ptrs[i].
2306 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2307 the original list item. */
2309 void
2310 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2312 struct gomp_thread *thr = gomp_thread ();
2313 struct gomp_task *task = thr->task;
2314 unsigned id = thr->ts.team_id;
2315 uintptr_t *data = task->taskgroup->reductions;
2316 uintptr_t *d;
2317 struct htab *reduction_htab = (struct htab *) data[5];
2318 size_t i;
2319 for (i = 0; i < cnt; ++i)
2321 hash_entry_type ent, n;
2322 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2323 n = htab_find (reduction_htab, ent);
2324 if (n)
2326 uintptr_t *p;
2327 __asm ("" : "=g" (p) : "0" (n));
2328 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2329 p[1] is the offset within the allocated chunk for each
2330 thread, p[2] is the array registered with
2331 GOMP_taskgroup_reduction_register, d[2] is the base of the
2332 allocated memory and d[1] is the size of the allocated chunk
2333 for one thread. */
2334 d = (uintptr_t *) p[2];
2335 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2336 if (__builtin_expect (i < cntorig, 0))
2337 ptrs[cnt + i] = (void *) p[0];
2338 continue;
2340 d = data;
2341 while (d != NULL)
2343 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2344 break;
2345 d = (uintptr_t *) d[4];
2347 if (d == NULL)
2348 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2349 "task modifier for %p", ptrs[i]);
2350 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2351 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2352 if (__builtin_expect (i < cntorig, 0))
2354 size_t lo = 0, hi = d[0] - 1;
2355 while (lo <= hi)
2357 size_t m = (lo + hi) / 2;
2358 if (d[7 + 3 * m + 1] < off)
2359 lo = m + 1;
2360 else if (d[7 + 3 * m + 1] == off)
2362 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2363 break;
2365 else
2366 hi = m - 1;
2368 if (lo > hi)
2369 gomp_fatal ("couldn't find matching task_reduction or reduction "
2370 "with task modifier for %p", ptrs[i]);
2375 struct gomp_taskgroup *
2376 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2378 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2379 gomp_reduction_register (data, NULL, NULL, nthreads);
2380 taskgroup->reductions = data;
2381 return taskgroup;
2384 void
2385 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2387 struct gomp_thread *thr = gomp_thread ();
2388 struct gomp_team *team = thr->ts.team;
2389 struct gomp_task *task = thr->task;
2390 unsigned nthreads = team->nthreads;
2391 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2392 task->taskgroup->reductions = data;
2395 void
2396 gomp_workshare_taskgroup_start (void)
2398 struct gomp_thread *thr = gomp_thread ();
2399 struct gomp_team *team = thr->ts.team;
2400 struct gomp_task *task;
2402 if (team == NULL)
2404 gomp_create_artificial_team ();
2405 team = thr->ts.team;
2407 task = thr->task;
2408 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2409 task->taskgroup->workshare = true;
2412 void
2413 GOMP_workshare_task_reduction_unregister (bool cancelled)
2415 struct gomp_thread *thr = gomp_thread ();
2416 struct gomp_task *task = thr->task;
2417 struct gomp_team *team = thr->ts.team;
2418 uintptr_t *data = task->taskgroup->reductions;
2419 ialias_call (GOMP_taskgroup_end) ();
2420 if (thr->ts.team_id == 0)
2421 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2422 else
2423 htab_free ((struct htab *) data[5]);
2425 if (!cancelled)
2426 gomp_team_barrier_wait (&team->barrier);
2430 omp_in_final (void)
2432 struct gomp_thread *thr = gomp_thread ();
2433 return thr->task && thr->task->final_task;
2436 ialias (omp_in_final)
2438 void
2439 omp_fulfill_event (omp_event_handle_t event)
2441 struct gomp_task *task = (struct gomp_task *) event;
2442 if (!task->deferred_p)
2444 if (gomp_sem_getcount (task->completion_sem) > 0)
2445 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2447 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2448 task);
2449 gomp_sem_post (task->completion_sem);
2450 return;
2453 struct gomp_team *team = __atomic_load_n (&task->detach_team,
2454 MEMMODEL_RELAXED);
2455 if (!team)
2456 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2457 "been fulfilled!\n", task);
2459 gomp_mutex_lock (&team->task_lock);
2460 if (task->kind != GOMP_TASK_DETACHED)
2462 /* The task has not finished running yet. */
2463 gomp_debug (0,
2464 "omp_fulfill_event: %p event fulfilled for unfinished "
2465 "task\n", task);
2466 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2467 gomp_mutex_unlock (&team->task_lock);
2468 return;
2471 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2472 task);
2473 size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2474 gomp_task_run_post_remove_parent (task);
2475 gomp_clear_parent (&task->children_queue);
2476 gomp_task_run_post_remove_taskgroup (task);
2477 team->task_count--;
2478 team->task_detach_count--;
2480 int do_wake = 0;
2481 bool shackled_thread_p = team == gomp_thread ()->ts.team;
2482 if (new_tasks > 0)
2484 /* Wake up threads to run new tasks. */
2485 gomp_team_barrier_set_task_pending (&team->barrier);
2486 do_wake = team->nthreads - team->task_running_count;
2487 if (do_wake > new_tasks)
2488 do_wake = new_tasks;
2491 if (!shackled_thread_p
2492 && !do_wake
2493 && team->task_detach_count == 0
2494 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2495 /* Ensure that at least one thread is woken up to signal that the
2496 barrier can finish. */
2497 do_wake = 1;
2499 /* If we are running in an unshackled thread, the team might vanish before
2500 gomp_team_barrier_wake is run if we release the lock first, so keep the
2501 lock for the call in that case. */
2502 if (shackled_thread_p)
2503 gomp_mutex_unlock (&team->task_lock);
2504 if (do_wake)
2505 gomp_team_barrier_wake (&team->barrier, do_wake);
2506 if (!shackled_thread_p)
2507 gomp_mutex_unlock (&team->task_lock);
2509 gomp_finish_task (task);
2510 free (task);
2513 ialias (omp_fulfill_event)