1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
33 #include "gomp-constants.h"
35 typedef struct gomp_task_depend_entry
*hash_entry_type
;
38 htab_alloc (size_t size
)
40 return gomp_malloc (size
);
51 static inline hashval_t
52 htab_hash (hash_entry_type element
)
54 return hash_pointer (element
->addr
);
58 htab_eq (hash_entry_type x
, hash_entry_type y
)
60 return x
->addr
== y
->addr
;
63 /* Create a new task data structure. */
66 gomp_init_task (struct gomp_task
*task
, struct gomp_task
*parent_task
,
67 struct gomp_task_icv
*prev_icv
)
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task
->parent
= parent_task
;
78 priority_queue_init (&task
->children_queue
);
79 task
->taskgroup
= NULL
;
80 task
->dependers
= NULL
;
81 task
->depend_hash
= NULL
;
82 task
->taskwait
= NULL
;
83 task
->depend_count
= 0;
84 task
->completion_sem
= NULL
;
85 task
->deferred_p
= false;
86 task
->icv
= *prev_icv
;
87 task
->kind
= GOMP_TASK_IMPLICIT
;
88 task
->in_tied_task
= false;
89 task
->final_task
= false;
90 task
->copy_ctors_done
= false;
91 task
->parent_depends_on
= false;
94 /* Clean up a task, after completing it. */
99 struct gomp_thread
*thr
= gomp_thread ();
100 struct gomp_task
*task
= thr
->task
;
102 gomp_finish_task (task
);
103 thr
->task
= task
->parent
;
106 /* Clear the parent field of every task in LIST. */
109 gomp_clear_parent_in_list (struct priority_list
*list
)
111 struct priority_node
*p
= list
->tasks
;
115 priority_node_to_task (PQ_CHILDREN
, p
)->parent
= NULL
;
118 while (p
!= list
->tasks
);
121 /* Splay tree version of gomp_clear_parent_in_list.
123 Clear the parent field of every task in NODE within SP, and free
124 the node when done. */
127 gomp_clear_parent_in_tree (prio_splay_tree sp
, prio_splay_tree_node node
)
131 prio_splay_tree_node left
= node
->left
, right
= node
->right
;
132 gomp_clear_parent_in_list (&node
->key
.l
);
133 #if _LIBGOMP_CHECKING_
134 memset (node
, 0xaf, sizeof (*node
));
136 /* No need to remove the node from the tree. We're nuking
137 everything, so just free the nodes and our caller can clear the
138 entire splay tree. */
140 gomp_clear_parent_in_tree (sp
, left
);
141 gomp_clear_parent_in_tree (sp
, right
);
144 /* Clear the parent field of every task in Q and remove every task
148 gomp_clear_parent (struct priority_queue
*q
)
150 if (priority_queue_multi_p (q
))
152 gomp_clear_parent_in_tree (&q
->t
, q
->t
.root
);
153 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 No need to remove anything. We can just nuke everything. */
158 gomp_clear_parent_in_list (&q
->l
);
161 /* Helper function for GOMP_task and gomp_create_target_task.
163 For a TASK with in/out dependencies, fill in the various dependency
164 queues. PARENT is the parent of said task. DEPEND is as in
168 gomp_task_handle_depend (struct gomp_task
*task
, struct gomp_task
*parent
,
171 size_t ndepend
= (uintptr_t) depend
[0];
177 /* depend[0] is total # */
178 size_t nout
= (uintptr_t) depend
[1]; /* # of out: and inout: */
179 /* ndepend - nout is # of in: */
180 for (i
= 0; i
< ndepend
; i
++)
182 task
->depend
[i
].addr
= depend
[2 + i
];
183 task
->depend
[i
].is_in
= i
>= nout
;
188 ndepend
= (uintptr_t) depend
[1]; /* total # */
189 size_t nout
= (uintptr_t) depend
[2]; /* # of out: and inout: */
190 size_t nmutexinoutset
= (uintptr_t) depend
[3]; /* # of mutexinoutset: */
191 /* For now we treat mutexinoutset like out, which is compliant, but
193 size_t nin
= (uintptr_t) depend
[4]; /* # of in: */
194 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195 size_t normal
= nout
+ nmutexinoutset
+ nin
;
197 for (i
= normal
; i
< ndepend
; i
++)
199 void **d
= (void **) (uintptr_t) depend
[5 + i
];
200 switch ((uintptr_t) d
[1])
202 case GOMP_DEPEND_OUT
:
203 case GOMP_DEPEND_INOUT
:
204 case GOMP_DEPEND_MUTEXINOUTSET
:
209 gomp_fatal ("unknown omp_depend_t dependence type %d",
210 (int) (uintptr_t) d
[1]);
212 task
->depend
[n
].addr
= d
[0];
213 task
->depend
[n
++].is_in
= 0;
215 for (i
= 0; i
< normal
; i
++)
217 task
->depend
[n
].addr
= depend
[5 + i
];
218 task
->depend
[n
++].is_in
= i
>= nout
+ nmutexinoutset
;
220 for (i
= normal
; i
< ndepend
; i
++)
222 void **d
= (void **) (uintptr_t) depend
[5 + i
];
223 if ((uintptr_t) d
[1] != GOMP_DEPEND_IN
)
225 task
->depend
[n
].addr
= d
[0];
226 task
->depend
[n
++].is_in
= 1;
229 task
->depend_count
= ndepend
;
230 task
->num_dependees
= 0;
231 if (parent
->depend_hash
== NULL
)
232 parent
->depend_hash
= htab_create (2 * ndepend
> 12 ? 2 * ndepend
: 12);
233 for (i
= 0; i
< ndepend
; i
++)
235 task
->depend
[i
].next
= NULL
;
236 task
->depend
[i
].prev
= NULL
;
237 task
->depend
[i
].task
= task
;
238 task
->depend
[i
].redundant
= false;
239 task
->depend
[i
].redundant_out
= false;
241 hash_entry_type
*slot
= htab_find_slot (&parent
->depend_hash
,
242 &task
->depend
[i
], INSERT
);
243 hash_entry_type out
= NULL
, last
= NULL
;
246 /* If multiple depends on the same task are the same, all but the
247 first one are redundant. As inout/out come first, if any of them
248 is inout/out, it will win, which is the right semantics. */
249 if ((*slot
)->task
== task
)
251 task
->depend
[i
].redundant
= true;
254 for (ent
= *slot
; ent
; ent
= ent
->next
)
256 if (ent
->redundant_out
)
261 /* depend(in:...) doesn't depend on earlier depend(in:...). */
262 if (task
->depend
[i
].is_in
&& ent
->is_in
)
268 struct gomp_task
*tsk
= ent
->task
;
269 if (tsk
->dependers
== NULL
)
272 = gomp_malloc (sizeof (struct gomp_dependers_vec
)
273 + 6 * sizeof (struct gomp_task
*));
274 tsk
->dependers
->n_elem
= 1;
275 tsk
->dependers
->allocated
= 6;
276 tsk
->dependers
->elem
[0] = task
;
277 task
->num_dependees
++;
280 /* We already have some other dependency on tsk from earlier
282 else if (tsk
->dependers
->n_elem
283 && (tsk
->dependers
->elem
[tsk
->dependers
->n_elem
- 1]
286 else if (tsk
->dependers
->n_elem
== tsk
->dependers
->allocated
)
288 tsk
->dependers
->allocated
289 = tsk
->dependers
->allocated
* 2 + 2;
291 = gomp_realloc (tsk
->dependers
,
292 sizeof (struct gomp_dependers_vec
)
293 + (tsk
->dependers
->allocated
294 * sizeof (struct gomp_task
*)));
296 tsk
->dependers
->elem
[tsk
->dependers
->n_elem
++] = task
;
297 task
->num_dependees
++;
299 task
->depend
[i
].next
= *slot
;
300 (*slot
)->prev
= &task
->depend
[i
];
302 *slot
= &task
->depend
[i
];
304 /* There is no need to store more than one depend({,in}out:) task per
305 address in the hash table chain for the purpose of creation of
306 deferred tasks, because each out depends on all earlier outs, thus it
307 is enough to record just the last depend({,in}out:). For depend(in:),
308 we need to keep all of the previous ones not terminated yet, because
309 a later depend({,in}out:) might need to depend on all of them. So, if
310 the new task's clause is depend({,in}out:), we know there is at most
311 one other depend({,in}out:) clause in the list (out). For
312 non-deferred tasks we want to see all outs, so they are moved to the
313 end of the chain, after first redundant_out entry all following
314 entries should be redundant_out. */
315 if (!task
->depend
[i
].is_in
&& out
)
319 out
->next
->prev
= out
->prev
;
320 out
->prev
->next
= out
->next
;
321 out
->next
= last
->next
;
325 out
->next
->prev
= out
;
327 out
->redundant_out
= true;
332 /* Called when encountering an explicit task directive. If IF_CLAUSE is
333 false, then we must not delay in executing the task. If UNTIED is true,
334 then the task may be executed by any member of the team.
336 DEPEND is an array containing:
337 if depend[0] is non-zero, then:
338 depend[0]: number of depend elements.
339 depend[1]: number of depend elements of type "out/inout".
340 depend[2..N+1]: address of [1..N]th depend element.
341 otherwise, when depend[0] is zero, then:
342 depend[1]: number of depend elements.
343 depend[2]: number of depend elements of type "out/inout".
344 depend[3]: number of depend elements of type "mutexinoutset".
345 depend[4]: number of depend elements of type "in".
346 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 omp_depend_t objects. */
351 GOMP_task (void (*fn
) (void *), void *data
, void (*cpyfn
) (void *, void *),
352 long arg_size
, long arg_align
, bool if_clause
, unsigned flags
,
353 void **depend
, int priority_arg
, void *detach
)
355 struct gomp_thread
*thr
= gomp_thread ();
356 struct gomp_team
*team
= thr
->ts
.team
;
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361 tied to one thread all the time. This means UNTIED tasks must be
362 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363 might be running on different thread than FN. */
366 flags
&= ~GOMP_TASK_FLAG_UNTIED
;
369 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
370 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
372 if (gomp_team_barrier_cancelled (&team
->barrier
))
374 if (thr
->task
->taskgroup
)
376 if (thr
->task
->taskgroup
->cancelled
)
378 if (thr
->task
->taskgroup
->workshare
379 && thr
->task
->taskgroup
->prev
380 && thr
->task
->taskgroup
->prev
->cancelled
)
385 if (__builtin_expect ((flags
& GOMP_TASK_FLAG_PRIORITY
) != 0, 0))
387 priority
= priority_arg
;
388 if (priority
> gomp_max_task_priority_var
)
389 priority
= gomp_max_task_priority_var
;
392 if (!if_clause
|| team
== NULL
393 || (thr
->task
&& thr
->task
->final_task
)
394 || team
->task_count
> 64 * team
->nthreads
)
396 struct gomp_task task
;
397 gomp_sem_t completion_sem
;
399 /* If there are depend clauses and earlier deferred sibling tasks
400 with depend clauses, check if there isn't a dependency. If there
401 is, we need to wait for them. There is no need to handle
402 depend clauses for non-deferred tasks other than this, because
403 the parent task is suspended until the child task finishes and thus
404 it can't start further child tasks. */
405 if ((flags
& GOMP_TASK_FLAG_DEPEND
)
406 && thr
->task
&& thr
->task
->depend_hash
)
407 gomp_task_maybe_wait_for_dependencies (depend
);
409 gomp_init_task (&task
, thr
->task
, gomp_icv (false));
410 task
.kind
= GOMP_TASK_UNDEFERRED
;
411 task
.final_task
= (thr
->task
&& thr
->task
->final_task
)
412 || (flags
& GOMP_TASK_FLAG_FINAL
);
413 task
.priority
= priority
;
415 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
417 gomp_sem_init (&completion_sem
, 0);
418 task
.completion_sem
= &completion_sem
;
419 *(void **) detach
= &task
;
421 *(void **) data
= &task
;
423 gomp_debug (0, "Thread %d: new event: %p\n",
424 thr
->ts
.team_id
, &task
);
429 task
.in_tied_task
= thr
->task
->in_tied_task
;
430 task
.taskgroup
= thr
->task
->taskgroup
;
433 if (__builtin_expect (cpyfn
!= NULL
, 0))
435 char buf
[arg_size
+ arg_align
- 1];
436 char *arg
= (char *) (((uintptr_t) buf
+ arg_align
- 1)
437 & ~(uintptr_t) (arg_align
- 1));
444 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
446 gomp_sem_wait (&completion_sem
);
447 gomp_sem_destroy (&completion_sem
);
450 /* Access to "children" is normally done inside a task_lock
451 mutex region, but the only way this particular task.children
452 can be set is if this thread's task work function (fn)
453 creates children. So since the setter is *this* thread, we
454 need no barriers here when testing for non-NULL. We can have
455 task.children set by the current thread then changed by a
456 child thread, but seeing a stale non-NULL value is not a
457 problem. Once past the task_lock acquisition, this thread
458 will see the real value of task.children. */
459 if (!priority_queue_empty_p (&task
.children_queue
, MEMMODEL_RELAXED
))
461 gomp_mutex_lock (&team
->task_lock
);
462 gomp_clear_parent (&task
.children_queue
);
463 gomp_mutex_unlock (&team
->task_lock
);
469 struct gomp_task
*task
;
470 struct gomp_task
*parent
= thr
->task
;
471 struct gomp_taskgroup
*taskgroup
= parent
->taskgroup
;
474 size_t depend_size
= 0;
476 if (flags
& GOMP_TASK_FLAG_DEPEND
)
477 depend_size
= ((uintptr_t) (depend
[0] ? depend
[0] : depend
[1])
478 * sizeof (struct gomp_task_depend_entry
));
479 task
= gomp_malloc (sizeof (*task
) + depend_size
480 + arg_size
+ arg_align
- 1);
481 arg
= (char *) (((uintptr_t) (task
+ 1) + depend_size
+ arg_align
- 1)
482 & ~(uintptr_t) (arg_align
- 1));
483 gomp_init_task (task
, parent
, gomp_icv (false));
484 task
->priority
= priority
;
485 task
->kind
= GOMP_TASK_UNDEFERRED
;
486 task
->in_tied_task
= parent
->in_tied_task
;
487 task
->taskgroup
= taskgroup
;
488 task
->deferred_p
= true;
489 if ((flags
& GOMP_TASK_FLAG_DETACH
) != 0)
491 task
->detach_team
= team
;
493 *(void **) detach
= task
;
495 *(void **) data
= task
;
497 gomp_debug (0, "Thread %d: new event: %p\n", thr
->ts
.team_id
, task
);
503 task
->copy_ctors_done
= true;
506 memcpy (arg
, data
, arg_size
);
508 task
->kind
= GOMP_TASK_WAITING
;
511 task
->final_task
= (flags
& GOMP_TASK_FLAG_FINAL
) >> 1;
512 gomp_mutex_lock (&team
->task_lock
);
513 /* If parallel or taskgroup has been cancelled, don't start new
515 if (__builtin_expect (gomp_cancel_var
, 0)
516 && !task
->copy_ctors_done
)
518 if (gomp_team_barrier_cancelled (&team
->barrier
))
521 gomp_mutex_unlock (&team
->task_lock
);
522 gomp_finish_task (task
);
528 if (taskgroup
->cancelled
)
530 if (taskgroup
->workshare
532 && taskgroup
->prev
->cancelled
)
537 taskgroup
->num_children
++;
540 gomp_task_handle_depend (task
, parent
, depend
);
541 if (task
->num_dependees
)
543 /* Tasks that depend on other tasks are not put into the
544 various waiting queues, so we are done for now. Said
545 tasks are instead put into the queues via
546 gomp_task_run_post_handle_dependers() after their
547 dependencies have been satisfied. After which, they
548 can be picked up by the various scheduling
550 gomp_mutex_unlock (&team
->task_lock
);
555 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
,
557 PRIORITY_INSERT_BEGIN
,
558 /*adjust_parent_depends_on=*/false,
559 task
->parent_depends_on
);
561 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
563 PRIORITY_INSERT_BEGIN
,
564 /*adjust_parent_depends_on=*/false,
565 task
->parent_depends_on
);
567 priority_queue_insert (PQ_TEAM
, &team
->task_queue
,
570 /*adjust_parent_depends_on=*/false,
571 task
->parent_depends_on
);
574 ++team
->task_queued_count
;
575 gomp_team_barrier_set_task_pending (&team
->barrier
);
576 do_wake
= team
->task_running_count
+ !parent
->in_tied_task
578 gomp_mutex_unlock (&team
->task_lock
);
580 gomp_team_barrier_wake (&team
->barrier
, 1);
584 ialias (GOMP_taskgroup_start
)
585 ialias (GOMP_taskgroup_end
)
586 ialias (GOMP_taskgroup_reduction_register
)
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
596 #define TYPE unsigned long long
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
605 priority_queue_move_task_first (enum priority_queue_type type
,
606 struct priority_queue
*head
,
607 struct gomp_task
*task
)
609 #if _LIBGOMP_CHECKING_
610 if (!priority_queue_task_in_queue_p (type
, head
, task
))
611 gomp_fatal ("Attempt to move first missing task %p", task
);
613 struct priority_list
*list
;
614 if (priority_queue_multi_p (head
))
616 list
= priority_queue_lookup_priority (head
, task
->priority
);
617 #if _LIBGOMP_CHECKING_
619 gomp_fatal ("Unable to find priority %d", task
->priority
);
624 priority_list_remove (list
, task_to_priority_node (type
, task
), 0);
625 priority_list_insert (type
, list
, task
, task
->priority
,
626 PRIORITY_INSERT_BEGIN
, type
== PQ_CHILDREN
,
627 task
->parent_depends_on
);
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631 with team->task_lock held, or is executed in the thread that called
632 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633 run before it acquires team->task_lock. */
636 gomp_target_task_completion (struct gomp_team
*team
, struct gomp_task
*task
)
638 struct gomp_task
*parent
= task
->parent
;
640 priority_queue_move_task_first (PQ_CHILDREN
, &parent
->children_queue
,
643 struct gomp_taskgroup
*taskgroup
= task
->taskgroup
;
645 priority_queue_move_task_first (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
648 priority_queue_insert (PQ_TEAM
, &team
->task_queue
, task
, task
->priority
,
649 PRIORITY_INSERT_BEGIN
, false,
650 task
->parent_depends_on
);
651 task
->kind
= GOMP_TASK_WAITING
;
652 if (parent
&& parent
->taskwait
)
654 if (parent
->taskwait
->in_taskwait
)
656 /* One more task has had its dependencies met.
657 Inform any waiters. */
658 parent
->taskwait
->in_taskwait
= false;
659 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
661 else if (parent
->taskwait
->in_depend_wait
)
663 /* One more task has had its dependencies met.
664 Inform any waiters. */
665 parent
->taskwait
->in_depend_wait
= false;
666 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
669 if (taskgroup
&& taskgroup
->in_taskgroup_wait
)
671 /* One more task has had its dependencies met.
672 Inform any waiters. */
673 taskgroup
->in_taskgroup_wait
= false;
674 gomp_sem_post (&taskgroup
->taskgroup_sem
);
677 ++team
->task_queued_count
;
678 gomp_team_barrier_set_task_pending (&team
->barrier
);
679 /* I'm afraid this can't be done after releasing team->task_lock,
680 as gomp_target_task_completion is run from unrelated thread and
681 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682 the team could be gone already. */
683 if (team
->nthreads
> team
->task_running_count
)
684 gomp_team_barrier_wake (&team
->barrier
, 1);
687 /* Signal that a target task TTASK has completed the asynchronously
688 running phase and should be requeued as a task to handle the
689 variable unmapping. */
692 GOMP_PLUGIN_target_task_completion (void *data
)
694 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
695 struct gomp_task
*task
= ttask
->task
;
696 struct gomp_team
*team
= ttask
->team
;
698 gomp_mutex_lock (&team
->task_lock
);
699 if (ttask
->state
== GOMP_TARGET_TASK_READY_TO_RUN
)
701 ttask
->state
= GOMP_TARGET_TASK_FINISHED
;
702 gomp_mutex_unlock (&team
->task_lock
);
705 ttask
->state
= GOMP_TARGET_TASK_FINISHED
;
706 gomp_target_task_completion (team
, task
);
707 gomp_mutex_unlock (&team
->task_lock
);
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task
*);
712 /* Called for nowait target tasks. */
715 gomp_create_target_task (struct gomp_device_descr
*devicep
,
716 void (*fn
) (void *), size_t mapnum
, void **hostaddrs
,
717 size_t *sizes
, unsigned short *kinds
,
718 unsigned int flags
, void **depend
, void **args
,
719 enum gomp_target_task_state state
)
721 struct gomp_thread
*thr
= gomp_thread ();
722 struct gomp_team
*team
= thr
->ts
.team
;
724 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
725 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
727 if (gomp_team_barrier_cancelled (&team
->barrier
))
729 if (thr
->task
->taskgroup
)
731 if (thr
->task
->taskgroup
->cancelled
)
733 if (thr
->task
->taskgroup
->workshare
734 && thr
->task
->taskgroup
->prev
735 && thr
->task
->taskgroup
->prev
->cancelled
)
740 struct gomp_target_task
*ttask
;
741 struct gomp_task
*task
;
742 struct gomp_task
*parent
= thr
->task
;
743 struct gomp_taskgroup
*taskgroup
= parent
->taskgroup
;
745 size_t depend_size
= 0;
746 uintptr_t depend_cnt
= 0;
747 size_t tgt_align
= 0, tgt_size
= 0;
751 depend_cnt
= (uintptr_t) (depend
[0] ? depend
[0] : depend
[1]);
752 depend_size
= depend_cnt
* sizeof (struct gomp_task_depend_entry
);
756 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
757 firstprivate on the target task. */
759 for (i
= 0; i
< mapnum
; i
++)
760 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
762 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
763 if (tgt_align
< align
)
765 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
766 tgt_size
+= sizes
[i
];
769 tgt_size
+= tgt_align
- 1;
774 task
= gomp_malloc (sizeof (*task
) + depend_size
776 + mapnum
* (sizeof (void *) + sizeof (size_t)
777 + sizeof (unsigned short))
779 gomp_init_task (task
, parent
, gomp_icv (false));
781 task
->kind
= GOMP_TASK_WAITING
;
782 task
->in_tied_task
= parent
->in_tied_task
;
783 task
->taskgroup
= taskgroup
;
784 ttask
= (struct gomp_target_task
*) &task
->depend
[depend_cnt
];
785 ttask
->devicep
= devicep
;
787 ttask
->mapnum
= mapnum
;
789 memcpy (ttask
->hostaddrs
, hostaddrs
, mapnum
* sizeof (void *));
790 ttask
->sizes
= (size_t *) &ttask
->hostaddrs
[mapnum
];
791 memcpy (ttask
->sizes
, sizes
, mapnum
* sizeof (size_t));
792 ttask
->kinds
= (unsigned short *) &ttask
->sizes
[mapnum
];
793 memcpy (ttask
->kinds
, kinds
, mapnum
* sizeof (unsigned short));
796 char *tgt
= (char *) &ttask
->kinds
[mapnum
];
798 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
800 tgt
+= tgt_align
- al
;
802 for (i
= 0; i
< mapnum
; i
++)
803 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
805 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
806 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
807 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
808 ttask
->hostaddrs
[i
] = tgt
+ tgt_size
;
809 tgt_size
= tgt_size
+ sizes
[i
];
812 ttask
->flags
= flags
;
813 ttask
->state
= state
;
817 task
->fn_data
= ttask
;
818 task
->final_task
= 0;
819 gomp_mutex_lock (&team
->task_lock
);
820 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
821 if (__builtin_expect (gomp_cancel_var
, 0))
823 if (gomp_team_barrier_cancelled (&team
->barrier
))
826 gomp_mutex_unlock (&team
->task_lock
);
827 gomp_finish_task (task
);
833 if (taskgroup
->cancelled
)
835 if (taskgroup
->workshare
837 && taskgroup
->prev
->cancelled
)
843 gomp_task_handle_depend (task
, parent
, depend
);
844 if (task
->num_dependees
)
847 taskgroup
->num_children
++;
848 gomp_mutex_unlock (&team
->task_lock
);
852 if (state
== GOMP_TARGET_TASK_DATA
)
854 gomp_task_run_post_handle_depend_hash (task
);
855 gomp_mutex_unlock (&team
->task_lock
);
856 gomp_finish_task (task
);
861 taskgroup
->num_children
++;
862 /* For async offloading, if we don't need to wait for dependencies,
863 run the gomp_target_task_fn right away, essentially schedule the
864 mapping part of the task in the current thread. */
866 && (devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
868 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
, task
, 0,
870 /*adjust_parent_depends_on=*/false,
871 task
->parent_depends_on
);
873 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
874 task
, 0, PRIORITY_INSERT_END
,
875 /*adjust_parent_depends_on=*/false,
876 task
->parent_depends_on
);
877 task
->pnode
[PQ_TEAM
].next
= NULL
;
878 task
->pnode
[PQ_TEAM
].prev
= NULL
;
879 task
->kind
= GOMP_TASK_TIED
;
881 gomp_mutex_unlock (&team
->task_lock
);
884 gomp_target_task_fn (task
->fn_data
);
887 gomp_mutex_lock (&team
->task_lock
);
888 task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
889 /* If GOMP_PLUGIN_target_task_completion has run already
890 in between gomp_target_task_fn and the mutex lock,
891 perform the requeuing here. */
892 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
893 gomp_target_task_completion (team
, task
);
895 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
896 gomp_mutex_unlock (&team
->task_lock
);
899 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
, task
, 0,
900 PRIORITY_INSERT_BEGIN
,
901 /*adjust_parent_depends_on=*/false,
902 task
->parent_depends_on
);
904 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
, task
, 0,
905 PRIORITY_INSERT_BEGIN
,
906 /*adjust_parent_depends_on=*/false,
907 task
->parent_depends_on
);
908 priority_queue_insert (PQ_TEAM
, &team
->task_queue
, task
, 0,
910 /*adjust_parent_depends_on=*/false,
911 task
->parent_depends_on
);
913 ++team
->task_queued_count
;
914 gomp_team_barrier_set_task_pending (&team
->barrier
);
915 do_wake
= team
->task_running_count
+ !parent
->in_tied_task
917 gomp_mutex_unlock (&team
->task_lock
);
919 gomp_team_barrier_wake (&team
->barrier
, 1);
923 /* Given a parent_depends_on task in LIST, move it to the front of its
924 priority so it is run as soon as possible.
926 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
928 We rearrange the queue such that all parent_depends_on tasks are
929 first, and last_parent_depends_on points to the last such task we
930 rearranged. For example, given the following tasks in a queue
931 where PD[123] are the parent_depends_on tasks:
936 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
938 We rearrange such that:
941 | +--- last_parent_depends_on
944 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
947 priority_list_upgrade_task (struct priority_list
*list
,
948 struct priority_node
*node
)
950 struct priority_node
*last_parent_depends_on
951 = list
->last_parent_depends_on
;
952 if (last_parent_depends_on
)
954 node
->prev
->next
= node
->next
;
955 node
->next
->prev
= node
->prev
;
956 node
->prev
= last_parent_depends_on
;
957 node
->next
= last_parent_depends_on
->next
;
958 node
->prev
->next
= node
;
959 node
->next
->prev
= node
;
961 else if (node
!= list
->tasks
)
963 node
->prev
->next
= node
->next
;
964 node
->next
->prev
= node
->prev
;
965 node
->prev
= list
->tasks
->prev
;
966 node
->next
= list
->tasks
;
968 node
->prev
->next
= node
;
969 node
->next
->prev
= node
;
971 list
->last_parent_depends_on
= node
;
974 /* Given a parent_depends_on TASK in its parent's children_queue, move
975 it to the front of its priority so it is run as soon as possible.
977 PARENT is passed as an optimization.
979 (This function could be defined in priority_queue.c, but we want it
980 inlined, and putting it in priority_queue.h is not an option, given
981 that gomp_task has not been properly defined at that point). */
984 priority_queue_upgrade_task (struct gomp_task
*task
,
985 struct gomp_task
*parent
)
987 struct priority_queue
*head
= &parent
->children_queue
;
988 struct priority_node
*node
= &task
->pnode
[PQ_CHILDREN
];
989 #if _LIBGOMP_CHECKING_
990 if (!task
->parent_depends_on
)
991 gomp_fatal ("priority_queue_upgrade_task: task must be a "
992 "parent_depends_on task");
993 if (!priority_queue_task_in_queue_p (PQ_CHILDREN
, head
, task
))
994 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task
);
996 if (priority_queue_multi_p (head
))
998 struct priority_list
*list
999 = priority_queue_lookup_priority (head
, task
->priority
);
1000 priority_list_upgrade_task (list
, node
);
1003 priority_list_upgrade_task (&head
->l
, node
);
1006 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1007 the way in LIST so that other tasks can be considered for
1008 execution. LIST contains tasks of type TYPE.
1010 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1014 priority_list_downgrade_task (enum priority_queue_type type
,
1015 struct priority_list
*list
,
1016 struct gomp_task
*child_task
)
1018 struct priority_node
*node
= task_to_priority_node (type
, child_task
);
1019 if (list
->tasks
== node
)
1020 list
->tasks
= node
->next
;
1021 else if (node
->next
!= list
->tasks
)
1023 /* The task in NODE is about to become TIED and TIED tasks
1024 cannot come before WAITING tasks. If we're about to
1025 leave the queue in such an indeterminate state, rewire
1026 things appropriately. However, a TIED task at the end is
1028 struct gomp_task
*next_task
= priority_node_to_task (type
, node
->next
);
1029 if (next_task
->kind
== GOMP_TASK_WAITING
)
1031 /* Remove from list. */
1032 node
->prev
->next
= node
->next
;
1033 node
->next
->prev
= node
->prev
;
1034 /* Rewire at the end. */
1035 node
->next
= list
->tasks
;
1036 node
->prev
= list
->tasks
->prev
;
1037 list
->tasks
->prev
->next
= node
;
1038 list
->tasks
->prev
= node
;
1042 /* If the current task is the last_parent_depends_on for its
1043 priority, adjust last_parent_depends_on appropriately. */
1044 if (__builtin_expect (child_task
->parent_depends_on
, 0)
1045 && list
->last_parent_depends_on
== node
)
1047 struct gomp_task
*prev_child
= priority_node_to_task (type
, node
->prev
);
1048 if (node
->prev
!= node
1049 && prev_child
->kind
== GOMP_TASK_WAITING
1050 && prev_child
->parent_depends_on
)
1051 list
->last_parent_depends_on
= node
->prev
;
1054 /* There are no more parent_depends_on entries waiting
1055 to run, clear the list. */
1056 list
->last_parent_depends_on
= NULL
;
1061 /* Given a TASK in HEAD that is about to be executed, move it out of
1062 the way so that other tasks can be considered for execution. HEAD
1063 contains tasks of type TYPE.
1065 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1068 (This function could be defined in priority_queue.c, but we want it
1069 inlined, and putting it in priority_queue.h is not an option, given
1070 that gomp_task has not been properly defined at that point). */
1073 priority_queue_downgrade_task (enum priority_queue_type type
,
1074 struct priority_queue
*head
,
1075 struct gomp_task
*task
)
1077 #if _LIBGOMP_CHECKING_
1078 if (!priority_queue_task_in_queue_p (type
, head
, task
))
1079 gomp_fatal ("Attempt to downgrade missing task %p", task
);
1081 if (priority_queue_multi_p (head
))
1083 struct priority_list
*list
1084 = priority_queue_lookup_priority (head
, task
->priority
);
1085 priority_list_downgrade_task (type
, list
, task
);
1088 priority_list_downgrade_task (type
, &head
->l
, task
);
1091 /* Setup CHILD_TASK to execute. This is done by setting the task to
1092 TIED, and updating all relevant queues so that CHILD_TASK is no
1093 longer chosen for scheduling. Also, remove CHILD_TASK from the
1094 overall team task queue entirely.
1096 Return TRUE if task or its containing taskgroup has been
1100 gomp_task_run_pre (struct gomp_task
*child_task
, struct gomp_task
*parent
,
1101 struct gomp_team
*team
)
1103 #if _LIBGOMP_CHECKING_
1104 if (child_task
->parent
)
1105 priority_queue_verify (PQ_CHILDREN
,
1106 &child_task
->parent
->children_queue
, true);
1107 if (child_task
->taskgroup
)
1108 priority_queue_verify (PQ_TASKGROUP
,
1109 &child_task
->taskgroup
->taskgroup_queue
, false);
1110 priority_queue_verify (PQ_TEAM
, &team
->task_queue
, false);
1113 /* Task is about to go tied, move it out of the way. */
1115 priority_queue_downgrade_task (PQ_CHILDREN
, &parent
->children_queue
,
1118 /* Task is about to go tied, move it out of the way. */
1119 struct gomp_taskgroup
*taskgroup
= child_task
->taskgroup
;
1121 priority_queue_downgrade_task (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
1124 priority_queue_remove (PQ_TEAM
, &team
->task_queue
, child_task
,
1126 child_task
->pnode
[PQ_TEAM
].next
= NULL
;
1127 child_task
->pnode
[PQ_TEAM
].prev
= NULL
;
1128 child_task
->kind
= GOMP_TASK_TIED
;
1130 if (--team
->task_queued_count
== 0)
1131 gomp_team_barrier_clear_task_pending (&team
->barrier
);
1132 if (__builtin_expect (gomp_cancel_var
, 0)
1133 && !child_task
->copy_ctors_done
)
1135 if (gomp_team_barrier_cancelled (&team
->barrier
))
1139 if (taskgroup
->cancelled
)
1141 if (taskgroup
->workshare
1143 && taskgroup
->prev
->cancelled
)
1151 gomp_task_run_post_handle_depend_hash (struct gomp_task
*child_task
)
1153 struct gomp_task
*parent
= child_task
->parent
;
1156 for (i
= 0; i
< child_task
->depend_count
; i
++)
1157 if (!child_task
->depend
[i
].redundant
)
1159 if (child_task
->depend
[i
].next
)
1160 child_task
->depend
[i
].next
->prev
= child_task
->depend
[i
].prev
;
1161 if (child_task
->depend
[i
].prev
)
1162 child_task
->depend
[i
].prev
->next
= child_task
->depend
[i
].next
;
1165 hash_entry_type
*slot
1166 = htab_find_slot (&parent
->depend_hash
, &child_task
->depend
[i
],
1168 if (*slot
!= &child_task
->depend
[i
])
1170 if (child_task
->depend
[i
].next
)
1171 *slot
= child_task
->depend
[i
].next
;
1173 htab_clear_slot (parent
->depend_hash
, slot
);
1178 /* After a CHILD_TASK has been run, adjust the dependency queue for
1179 each task that depends on CHILD_TASK, to record the fact that there
1180 is one less dependency to worry about. If a task that depended on
1181 CHILD_TASK now has no dependencies, place it in the various queues
1182 so it gets scheduled to run.
1184 TEAM is the team to which CHILD_TASK belongs to. */
1187 gomp_task_run_post_handle_dependers (struct gomp_task
*child_task
,
1188 struct gomp_team
*team
)
1190 struct gomp_task
*parent
= child_task
->parent
;
1191 size_t i
, count
= child_task
->dependers
->n_elem
, ret
= 0;
1192 for (i
= 0; i
< count
; i
++)
1194 struct gomp_task
*task
= child_task
->dependers
->elem
[i
];
1196 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1197 TASK's remaining dependencies. Once TASK has no other
1198 dependencies, put it into the various queues so it will get
1199 scheduled for execution. */
1200 if (--task
->num_dependees
!= 0)
1203 struct gomp_taskgroup
*taskgroup
= task
->taskgroup
;
1206 priority_queue_insert (PQ_CHILDREN
, &parent
->children_queue
,
1207 task
, task
->priority
,
1208 PRIORITY_INSERT_BEGIN
,
1209 /*adjust_parent_depends_on=*/true,
1210 task
->parent_depends_on
);
1211 if (parent
->taskwait
)
1213 if (parent
->taskwait
->in_taskwait
)
1215 /* One more task has had its dependencies met.
1216 Inform any waiters. */
1217 parent
->taskwait
->in_taskwait
= false;
1218 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1220 else if (parent
->taskwait
->in_depend_wait
)
1222 /* One more task has had its dependencies met.
1223 Inform any waiters. */
1224 parent
->taskwait
->in_depend_wait
= false;
1225 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1231 priority_queue_insert (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
1232 task
, task
->priority
,
1233 PRIORITY_INSERT_BEGIN
,
1234 /*adjust_parent_depends_on=*/false,
1235 task
->parent_depends_on
);
1236 if (taskgroup
->in_taskgroup_wait
)
1238 /* One more task has had its dependencies met.
1239 Inform any waiters. */
1240 taskgroup
->in_taskgroup_wait
= false;
1241 gomp_sem_post (&taskgroup
->taskgroup_sem
);
1244 priority_queue_insert (PQ_TEAM
, &team
->task_queue
,
1245 task
, task
->priority
,
1246 PRIORITY_INSERT_END
,
1247 /*adjust_parent_depends_on=*/false,
1248 task
->parent_depends_on
);
1250 ++team
->task_queued_count
;
1253 free (child_task
->dependers
);
1254 child_task
->dependers
= NULL
;
1256 gomp_team_barrier_set_task_pending (&team
->barrier
);
1260 static inline size_t
1261 gomp_task_run_post_handle_depend (struct gomp_task
*child_task
,
1262 struct gomp_team
*team
)
1264 if (child_task
->depend_count
== 0)
1267 /* If parent is gone already, the hash table is freed and nothing
1268 will use the hash table anymore, no need to remove anything from it. */
1269 if (child_task
->parent
!= NULL
)
1270 gomp_task_run_post_handle_depend_hash (child_task
);
1272 if (child_task
->dependers
== NULL
)
1275 return gomp_task_run_post_handle_dependers (child_task
, team
);
1278 /* Remove CHILD_TASK from its parent. */
1281 gomp_task_run_post_remove_parent (struct gomp_task
*child_task
)
1283 struct gomp_task
*parent
= child_task
->parent
;
1287 /* If this was the last task the parent was depending on,
1288 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1289 clean up and return. */
1290 if (__builtin_expect (child_task
->parent_depends_on
, 0)
1291 && --parent
->taskwait
->n_depend
== 0
1292 && parent
->taskwait
->in_depend_wait
)
1294 parent
->taskwait
->in_depend_wait
= false;
1295 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1298 if (priority_queue_remove (PQ_CHILDREN
, &parent
->children_queue
,
1299 child_task
, MEMMODEL_RELEASE
)
1300 && parent
->taskwait
&& parent
->taskwait
->in_taskwait
)
1302 parent
->taskwait
->in_taskwait
= false;
1303 gomp_sem_post (&parent
->taskwait
->taskwait_sem
);
1305 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
1306 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
1309 /* Remove CHILD_TASK from its taskgroup. */
1312 gomp_task_run_post_remove_taskgroup (struct gomp_task
*child_task
)
1314 struct gomp_taskgroup
*taskgroup
= child_task
->taskgroup
;
1315 if (taskgroup
== NULL
)
1317 bool empty
= priority_queue_remove (PQ_TASKGROUP
,
1318 &taskgroup
->taskgroup_queue
,
1319 child_task
, MEMMODEL_RELAXED
);
1320 child_task
->pnode
[PQ_TASKGROUP
].next
= NULL
;
1321 child_task
->pnode
[PQ_TASKGROUP
].prev
= NULL
;
1322 if (taskgroup
->num_children
> 1)
1323 --taskgroup
->num_children
;
1326 /* We access taskgroup->num_children in GOMP_taskgroup_end
1327 outside of the task lock mutex region, so
1328 need a release barrier here to ensure memory
1329 written by child_task->fn above is flushed
1330 before the NULL is written. */
1331 __atomic_store_n (&taskgroup
->num_children
, 0, MEMMODEL_RELEASE
);
1333 if (empty
&& taskgroup
->in_taskgroup_wait
)
1335 taskgroup
->in_taskgroup_wait
= false;
1336 gomp_sem_post (&taskgroup
->taskgroup_sem
);
1341 gomp_barrier_handle_tasks (gomp_barrier_state_t state
)
1343 struct gomp_thread
*thr
= gomp_thread ();
1344 struct gomp_team
*team
= thr
->ts
.team
;
1345 struct gomp_task
*task
= thr
->task
;
1346 struct gomp_task
*child_task
= NULL
;
1347 struct gomp_task
*to_free
= NULL
;
1350 gomp_mutex_lock (&team
->task_lock
);
1351 if (gomp_barrier_last_thread (state
))
1353 if (team
->task_count
== 0)
1355 gomp_team_barrier_done (&team
->barrier
, state
);
1356 gomp_mutex_unlock (&team
->task_lock
);
1357 gomp_team_barrier_wake (&team
->barrier
, 0);
1360 gomp_team_barrier_set_waiting_for_tasks (&team
->barrier
);
1365 bool cancelled
= false;
1367 if (!priority_queue_empty_p (&team
->task_queue
, MEMMODEL_RELAXED
))
1371 = priority_queue_next_task (PQ_TEAM
, &team
->task_queue
,
1374 cancelled
= gomp_task_run_pre (child_task
, child_task
->parent
,
1376 if (__builtin_expect (cancelled
, 0))
1380 gomp_finish_task (to_free
);
1384 goto finish_cancelled
;
1386 team
->task_running_count
++;
1387 child_task
->in_tied_task
= true;
1389 else if (team
->task_count
== 0
1390 && gomp_team_barrier_waiting_for_tasks (&team
->barrier
))
1392 gomp_team_barrier_done (&team
->barrier
, state
);
1393 gomp_mutex_unlock (&team
->task_lock
);
1394 gomp_team_barrier_wake (&team
->barrier
, 0);
1397 gomp_finish_task (to_free
);
1402 gomp_mutex_unlock (&team
->task_lock
);
1405 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
1410 gomp_finish_task (to_free
);
1416 thr
->task
= child_task
;
1417 if (__builtin_expect (child_task
->fn
== NULL
, 0))
1419 if (gomp_target_task_fn (child_task
->fn_data
))
1422 gomp_mutex_lock (&team
->task_lock
);
1423 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1424 team
->task_running_count
--;
1425 struct gomp_target_task
*ttask
1426 = (struct gomp_target_task
*) child_task
->fn_data
;
1427 /* If GOMP_PLUGIN_target_task_completion has run already
1428 in between gomp_target_task_fn and the mutex lock,
1429 perform the requeuing here. */
1430 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1431 gomp_target_task_completion (team
, child_task
);
1433 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1439 child_task
->fn (child_task
->fn_data
);
1444 gomp_mutex_lock (&team
->task_lock
);
1447 if (child_task
->detach_team
)
1449 assert (child_task
->detach_team
== team
);
1450 child_task
->kind
= GOMP_TASK_DETACHED
;
1451 ++team
->task_detach_count
;
1452 --team
->task_running_count
;
1454 "thread %d: task with event %p finished without "
1455 "completion event fulfilled in team barrier\n",
1456 thr
->ts
.team_id
, child_task
);
1463 = gomp_task_run_post_handle_depend (child_task
, team
);
1464 gomp_task_run_post_remove_parent (child_task
);
1465 gomp_clear_parent (&child_task
->children_queue
);
1466 gomp_task_run_post_remove_taskgroup (child_task
);
1467 to_free
= child_task
;
1469 team
->task_running_count
--;
1473 do_wake
= team
->nthreads
- team
->task_running_count
;
1474 if (do_wake
> new_tasks
)
1475 do_wake
= new_tasks
;
1482 /* Called when encountering a taskwait directive.
1484 Wait for all children of the current task. */
1487 GOMP_taskwait (void)
1489 struct gomp_thread
*thr
= gomp_thread ();
1490 struct gomp_team
*team
= thr
->ts
.team
;
1491 struct gomp_task
*task
= thr
->task
;
1492 struct gomp_task
*child_task
= NULL
;
1493 struct gomp_task
*to_free
= NULL
;
1494 struct gomp_taskwait taskwait
;
1497 /* The acquire barrier on load of task->children here synchronizes
1498 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1499 not necessary that we synchronize with other non-NULL writes at
1500 this point, but we must ensure that all writes to memory by a
1501 child thread task work function are seen before we exit from
1504 || priority_queue_empty_p (&task
->children_queue
, MEMMODEL_ACQUIRE
))
1507 memset (&taskwait
, 0, sizeof (taskwait
));
1508 bool child_q
= false;
1509 gomp_mutex_lock (&team
->task_lock
);
1512 bool cancelled
= false;
1513 if (priority_queue_empty_p (&task
->children_queue
, MEMMODEL_RELAXED
))
1515 bool destroy_taskwait
= task
->taskwait
!= NULL
;
1516 task
->taskwait
= NULL
;
1517 gomp_mutex_unlock (&team
->task_lock
);
1520 gomp_finish_task (to_free
);
1523 if (destroy_taskwait
)
1524 gomp_sem_destroy (&taskwait
.taskwait_sem
);
1527 struct gomp_task
*next_task
1528 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
1529 PQ_TEAM
, &team
->task_queue
, &child_q
);
1530 if (next_task
->kind
== GOMP_TASK_WAITING
)
1532 child_task
= next_task
;
1534 = gomp_task_run_pre (child_task
, task
, team
);
1535 if (__builtin_expect (cancelled
, 0))
1539 gomp_finish_task (to_free
);
1543 goto finish_cancelled
;
1548 /* All tasks we are waiting for are either running in other
1549 threads, are detached and waiting for the completion event to be
1550 fulfilled, or they are tasks that have not had their
1551 dependencies met (so they're not even in the queue). Wait
1553 if (task
->taskwait
== NULL
)
1555 taskwait
.in_depend_wait
= false;
1556 gomp_sem_init (&taskwait
.taskwait_sem
, 0);
1557 task
->taskwait
= &taskwait
;
1559 taskwait
.in_taskwait
= true;
1561 gomp_mutex_unlock (&team
->task_lock
);
1564 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
1569 gomp_finish_task (to_free
);
1575 thr
->task
= child_task
;
1576 if (__builtin_expect (child_task
->fn
== NULL
, 0))
1578 if (gomp_target_task_fn (child_task
->fn_data
))
1581 gomp_mutex_lock (&team
->task_lock
);
1582 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1583 struct gomp_target_task
*ttask
1584 = (struct gomp_target_task
*) child_task
->fn_data
;
1585 /* If GOMP_PLUGIN_target_task_completion has run already
1586 in between gomp_target_task_fn and the mutex lock,
1587 perform the requeuing here. */
1588 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1589 gomp_target_task_completion (team
, child_task
);
1591 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1597 child_task
->fn (child_task
->fn_data
);
1601 gomp_sem_wait (&taskwait
.taskwait_sem
);
1602 gomp_mutex_lock (&team
->task_lock
);
1605 if (child_task
->detach_team
)
1607 assert (child_task
->detach_team
== team
);
1608 child_task
->kind
= GOMP_TASK_DETACHED
;
1609 ++team
->task_detach_count
;
1611 "thread %d: task with event %p finished without "
1612 "completion event fulfilled in taskwait\n",
1613 thr
->ts
.team_id
, child_task
);
1620 = gomp_task_run_post_handle_depend (child_task
, team
);
1624 priority_queue_remove (PQ_CHILDREN
, &task
->children_queue
,
1625 child_task
, MEMMODEL_RELAXED
);
1626 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
1627 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
1630 gomp_clear_parent (&child_task
->children_queue
);
1632 gomp_task_run_post_remove_taskgroup (child_task
);
1634 to_free
= child_task
;
1639 do_wake
= team
->nthreads
- team
->task_running_count
1640 - !task
->in_tied_task
;
1641 if (do_wake
> new_tasks
)
1642 do_wake
= new_tasks
;
1648 /* Called when encountering a taskwait directive with depend clause(s).
1649 Wait as if it was an mergeable included task construct with empty body. */
1652 GOMP_taskwait_depend (void **depend
)
1654 struct gomp_thread
*thr
= gomp_thread ();
1655 struct gomp_team
*team
= thr
->ts
.team
;
1657 /* If parallel or taskgroup has been cancelled, return early. */
1658 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
1660 if (gomp_team_barrier_cancelled (&team
->barrier
))
1662 if (thr
->task
->taskgroup
)
1664 if (thr
->task
->taskgroup
->cancelled
)
1666 if (thr
->task
->taskgroup
->workshare
1667 && thr
->task
->taskgroup
->prev
1668 && thr
->task
->taskgroup
->prev
->cancelled
)
1673 if (thr
->task
&& thr
->task
->depend_hash
)
1674 gomp_task_maybe_wait_for_dependencies (depend
);
1677 /* An undeferred task is about to run. Wait for all tasks that this
1678 undeferred task depends on.
1680 This is done by first putting all known ready dependencies
1681 (dependencies that have their own dependencies met) at the top of
1682 the scheduling queues. Then we iterate through these imminently
1683 ready tasks (and possibly other high priority tasks), and run them.
1684 If we run out of ready dependencies to execute, we either wait for
1685 the remaining dependencies to finish, or wait for them to get
1686 scheduled so we can run them.
1688 DEPEND is as in GOMP_task. */
1691 gomp_task_maybe_wait_for_dependencies (void **depend
)
1693 struct gomp_thread
*thr
= gomp_thread ();
1694 struct gomp_task
*task
= thr
->task
;
1695 struct gomp_team
*team
= thr
->ts
.team
;
1696 struct gomp_task_depend_entry elem
, *ent
= NULL
;
1697 struct gomp_taskwait taskwait
;
1698 size_t orig_ndepend
= (uintptr_t) depend
[0];
1699 size_t nout
= (uintptr_t) depend
[1];
1700 size_t ndepend
= orig_ndepend
;
1701 size_t normal
= ndepend
;
1704 size_t num_awaited
= 0;
1705 struct gomp_task
*child_task
= NULL
;
1706 struct gomp_task
*to_free
= NULL
;
1712 nout
= (uintptr_t) depend
[2] + (uintptr_t) depend
[3];
1713 normal
= nout
+ (uintptr_t) depend
[4];
1716 gomp_mutex_lock (&team
->task_lock
);
1717 for (i
= 0; i
< ndepend
; i
++)
1719 elem
.addr
= depend
[i
+ n
];
1720 elem
.is_in
= i
>= nout
;
1721 if (__builtin_expect (i
>= normal
, 0))
1723 void **d
= (void **) elem
.addr
;
1724 switch ((uintptr_t) d
[1])
1726 case GOMP_DEPEND_IN
:
1728 case GOMP_DEPEND_OUT
:
1729 case GOMP_DEPEND_INOUT
:
1730 case GOMP_DEPEND_MUTEXINOUTSET
:
1734 gomp_fatal ("unknown omp_depend_t dependence type %d",
1735 (int) (uintptr_t) d
[1]);
1739 ent
= htab_find (task
->depend_hash
, &elem
);
1740 for (; ent
; ent
= ent
->next
)
1741 if (elem
.is_in
&& ent
->is_in
)
1745 struct gomp_task
*tsk
= ent
->task
;
1746 if (!tsk
->parent_depends_on
)
1748 tsk
->parent_depends_on
= true;
1750 /* If dependency TSK itself has no dependencies and is
1751 ready to run, move it up front so that we run it as
1752 soon as possible. */
1753 if (tsk
->num_dependees
== 0 && tsk
->kind
== GOMP_TASK_WAITING
)
1754 priority_queue_upgrade_task (tsk
, task
);
1758 if (num_awaited
== 0)
1760 gomp_mutex_unlock (&team
->task_lock
);
1764 memset (&taskwait
, 0, sizeof (taskwait
));
1765 taskwait
.n_depend
= num_awaited
;
1766 gomp_sem_init (&taskwait
.taskwait_sem
, 0);
1767 task
->taskwait
= &taskwait
;
1771 bool cancelled
= false;
1772 if (taskwait
.n_depend
== 0)
1774 task
->taskwait
= NULL
;
1775 gomp_mutex_unlock (&team
->task_lock
);
1778 gomp_finish_task (to_free
);
1781 gomp_sem_destroy (&taskwait
.taskwait_sem
);
1785 /* Theoretically when we have multiple priorities, we should
1786 chose between the highest priority item in
1787 task->children_queue and team->task_queue here, so we should
1788 use priority_queue_next_task(). However, since we are
1789 running an undeferred task, perhaps that makes all tasks it
1790 depends on undeferred, thus a priority of INF? This would
1791 make it unnecessary to take anything into account here,
1792 but the dependencies.
1794 On the other hand, if we want to use priority_queue_next_task(),
1795 care should be taken to only use priority_queue_remove()
1796 below if the task was actually removed from the children
1799 struct gomp_task
*next_task
1800 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
1801 PQ_IGNORED
, NULL
, &ignored
);
1803 if (next_task
->kind
== GOMP_TASK_WAITING
)
1805 child_task
= next_task
;
1807 = gomp_task_run_pre (child_task
, task
, team
);
1808 if (__builtin_expect (cancelled
, 0))
1812 gomp_finish_task (to_free
);
1816 goto finish_cancelled
;
1820 /* All tasks we are waiting for are either running in other
1821 threads, or they are tasks that have not had their
1822 dependencies met (so they're not even in the queue). Wait
1824 taskwait
.in_depend_wait
= true;
1825 gomp_mutex_unlock (&team
->task_lock
);
1828 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
1833 gomp_finish_task (to_free
);
1839 thr
->task
= child_task
;
1840 if (__builtin_expect (child_task
->fn
== NULL
, 0))
1842 if (gomp_target_task_fn (child_task
->fn_data
))
1845 gomp_mutex_lock (&team
->task_lock
);
1846 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
1847 struct gomp_target_task
*ttask
1848 = (struct gomp_target_task
*) child_task
->fn_data
;
1849 /* If GOMP_PLUGIN_target_task_completion has run already
1850 in between gomp_target_task_fn and the mutex lock,
1851 perform the requeuing here. */
1852 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
1853 gomp_target_task_completion (team
, child_task
);
1855 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
1861 child_task
->fn (child_task
->fn_data
);
1865 gomp_sem_wait (&taskwait
.taskwait_sem
);
1866 gomp_mutex_lock (&team
->task_lock
);
1871 = gomp_task_run_post_handle_depend (child_task
, team
);
1872 if (child_task
->parent_depends_on
)
1873 --taskwait
.n_depend
;
1875 priority_queue_remove (PQ_CHILDREN
, &task
->children_queue
,
1876 child_task
, MEMMODEL_RELAXED
);
1877 child_task
->pnode
[PQ_CHILDREN
].next
= NULL
;
1878 child_task
->pnode
[PQ_CHILDREN
].prev
= NULL
;
1880 gomp_clear_parent (&child_task
->children_queue
);
1881 gomp_task_run_post_remove_taskgroup (child_task
);
1882 to_free
= child_task
;
1887 do_wake
= team
->nthreads
- team
->task_running_count
1888 - !task
->in_tied_task
;
1889 if (do_wake
> new_tasks
)
1890 do_wake
= new_tasks
;
1896 /* Called when encountering a taskyield directive. */
1899 GOMP_taskyield (void)
1901 /* Nothing at the moment. */
1904 static inline struct gomp_taskgroup
*
1905 gomp_taskgroup_init (struct gomp_taskgroup
*prev
)
1907 struct gomp_taskgroup
*taskgroup
1908 = gomp_malloc (sizeof (struct gomp_taskgroup
));
1909 taskgroup
->prev
= prev
;
1910 priority_queue_init (&taskgroup
->taskgroup_queue
);
1911 taskgroup
->reductions
= prev
? prev
->reductions
: NULL
;
1912 taskgroup
->in_taskgroup_wait
= false;
1913 taskgroup
->cancelled
= false;
1914 taskgroup
->workshare
= false;
1915 taskgroup
->num_children
= 0;
1916 gomp_sem_init (&taskgroup
->taskgroup_sem
, 0);
1921 GOMP_taskgroup_start (void)
1923 struct gomp_thread
*thr
= gomp_thread ();
1924 struct gomp_team
*team
= thr
->ts
.team
;
1925 struct gomp_task
*task
= thr
->task
;
1927 /* If team is NULL, all tasks are executed as
1928 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1929 taskgroup and their descendant tasks will be finished
1930 by the time GOMP_taskgroup_end is called. */
1933 task
->taskgroup
= gomp_taskgroup_init (task
->taskgroup
);
1937 GOMP_taskgroup_end (void)
1939 struct gomp_thread
*thr
= gomp_thread ();
1940 struct gomp_team
*team
= thr
->ts
.team
;
1941 struct gomp_task
*task
= thr
->task
;
1942 struct gomp_taskgroup
*taskgroup
;
1943 struct gomp_task
*child_task
= NULL
;
1944 struct gomp_task
*to_free
= NULL
;
1949 taskgroup
= task
->taskgroup
;
1950 if (__builtin_expect (taskgroup
== NULL
, 0)
1951 && thr
->ts
.level
== 0)
1953 /* This can happen if GOMP_taskgroup_start is called when
1954 thr->ts.team == NULL, but inside of the taskgroup there
1955 is #pragma omp target nowait that creates an implicit
1956 team with a single thread. In this case, we want to wait
1957 for all outstanding tasks in this team. */
1958 gomp_team_barrier_wait (&team
->barrier
);
1962 /* The acquire barrier on load of taskgroup->num_children here
1963 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1964 It is not necessary that we synchronize with other non-0 writes at
1965 this point, but we must ensure that all writes to memory by a
1966 child thread task work function are seen before we exit from
1967 GOMP_taskgroup_end. */
1968 if (__atomic_load_n (&taskgroup
->num_children
, MEMMODEL_ACQUIRE
) == 0)
1972 gomp_mutex_lock (&team
->task_lock
);
1975 bool cancelled
= false;
1976 if (priority_queue_empty_p (&taskgroup
->taskgroup_queue
,
1979 if (taskgroup
->num_children
)
1981 if (priority_queue_empty_p (&task
->children_queue
,
1985 = priority_queue_next_task (PQ_CHILDREN
, &task
->children_queue
,
1986 PQ_TEAM
, &team
->task_queue
,
1991 gomp_mutex_unlock (&team
->task_lock
);
1994 gomp_finish_task (to_free
);
2002 = priority_queue_next_task (PQ_TASKGROUP
, &taskgroup
->taskgroup_queue
,
2003 PQ_TEAM
, &team
->task_queue
, &unused
);
2004 if (child_task
->kind
== GOMP_TASK_WAITING
)
2007 = gomp_task_run_pre (child_task
, child_task
->parent
, team
);
2008 if (__builtin_expect (cancelled
, 0))
2012 gomp_finish_task (to_free
);
2016 goto finish_cancelled
;
2023 /* All tasks we are waiting for are either running in other
2024 threads, or they are tasks that have not had their
2025 dependencies met (so they're not even in the queue). Wait
2027 taskgroup
->in_taskgroup_wait
= true;
2029 gomp_mutex_unlock (&team
->task_lock
);
2032 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
2037 gomp_finish_task (to_free
);
2043 thr
->task
= child_task
;
2044 if (__builtin_expect (child_task
->fn
== NULL
, 0))
2046 if (gomp_target_task_fn (child_task
->fn_data
))
2049 gomp_mutex_lock (&team
->task_lock
);
2050 child_task
->kind
= GOMP_TASK_ASYNC_RUNNING
;
2051 struct gomp_target_task
*ttask
2052 = (struct gomp_target_task
*) child_task
->fn_data
;
2053 /* If GOMP_PLUGIN_target_task_completion has run already
2054 in between gomp_target_task_fn and the mutex lock,
2055 perform the requeuing here. */
2056 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2057 gomp_target_task_completion (team
, child_task
);
2059 ttask
->state
= GOMP_TARGET_TASK_RUNNING
;
2065 child_task
->fn (child_task
->fn_data
);
2069 gomp_sem_wait (&taskgroup
->taskgroup_sem
);
2070 gomp_mutex_lock (&team
->task_lock
);
2073 if (child_task
->detach_team
)
2075 assert (child_task
->detach_team
== team
);
2076 child_task
->kind
= GOMP_TASK_DETACHED
;
2077 ++team
->task_detach_count
;
2079 "thread %d: task with event %p finished without "
2080 "completion event fulfilled in taskgroup\n",
2081 thr
->ts
.team_id
, child_task
);
2088 = gomp_task_run_post_handle_depend (child_task
, team
);
2089 gomp_task_run_post_remove_parent (child_task
);
2090 gomp_clear_parent (&child_task
->children_queue
);
2091 gomp_task_run_post_remove_taskgroup (child_task
);
2092 to_free
= child_task
;
2097 do_wake
= team
->nthreads
- team
->task_running_count
2098 - !task
->in_tied_task
;
2099 if (do_wake
> new_tasks
)
2100 do_wake
= new_tasks
;
2106 task
->taskgroup
= taskgroup
->prev
;
2107 gomp_sem_destroy (&taskgroup
->taskgroup_sem
);
2111 static inline __attribute__((always_inline
)) void
2112 gomp_reduction_register (uintptr_t *data
, uintptr_t *old
, uintptr_t *orig
,
2115 size_t total_cnt
= 0;
2116 uintptr_t *d
= data
;
2117 struct htab
*old_htab
= NULL
, *new_htab
;
2120 if (__builtin_expect (orig
!= NULL
, 0))
2122 /* For worksharing task reductions, memory has been allocated
2123 already by some other thread that encountered the construct
2127 orig
= (uintptr_t *) orig
[4];
2131 size_t sz
= d
[1] * nthreads
;
2132 /* Should use omp_alloc if d[3] is not -1. */
2133 void *ptr
= gomp_aligned_alloc (d
[2], sz
);
2134 memset (ptr
, '\0', sz
);
2135 d
[2] = (uintptr_t) ptr
;
2142 d
[4] = (uintptr_t) old
;
2146 d
= (uintptr_t *) d
[4];
2151 old_htab
= (struct htab
*) old
[5];
2152 total_cnt
+= htab_elements (old_htab
);
2154 new_htab
= htab_create (total_cnt
);
2157 /* Copy old hash table, like in htab_expand. */
2158 hash_entry_type
*p
, *olimit
;
2159 new_htab
->n_elements
= htab_elements (old_htab
);
2160 olimit
= old_htab
->entries
+ old_htab
->size
;
2161 p
= old_htab
->entries
;
2164 hash_entry_type x
= *p
;
2165 if (x
!= HTAB_EMPTY_ENTRY
&& x
!= HTAB_DELETED_ENTRY
)
2166 *find_empty_slot_for_expand (new_htab
, htab_hash (x
)) = x
;
2175 for (j
= 0; j
< d
[0]; ++j
)
2177 uintptr_t *p
= d
+ 7 + j
* 3;
2178 p
[2] = (uintptr_t) d
;
2179 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2180 which hash on the first element which is a pointer. We need
2181 to hash also on the first sizeof (uintptr_t) bytes which contain
2182 a pointer. Hide the cast from the compiler. */
2184 __asm ("" : "=g" (n
) : "0" (p
));
2185 *htab_find_slot (&new_htab
, n
, INSERT
) = n
;
2187 if (d
[4] == (uintptr_t) old
)
2190 d
= (uintptr_t *) d
[4];
2193 d
[5] = (uintptr_t) new_htab
;
2197 gomp_create_artificial_team (void)
2199 struct gomp_thread
*thr
= gomp_thread ();
2200 struct gomp_task_icv
*icv
;
2201 struct gomp_team
*team
= gomp_new_team (1);
2202 struct gomp_task
*task
= thr
->task
;
2203 icv
= task
? &task
->icv
: &gomp_global_icv
;
2204 team
->prev_ts
= thr
->ts
;
2205 thr
->ts
.team
= team
;
2206 thr
->ts
.team_id
= 0;
2207 thr
->ts
.work_share
= &team
->work_shares
[0];
2208 thr
->ts
.last_work_share
= NULL
;
2209 #ifdef HAVE_SYNC_BUILTINS
2210 thr
->ts
.single_count
= 0;
2212 thr
->ts
.static_trip
= 0;
2213 thr
->task
= &team
->implicit_task
[0];
2214 gomp_init_task (thr
->task
, NULL
, icv
);
2220 thr
->task
= &team
->implicit_task
[0];
2222 #ifdef LIBGOMP_USE_PTHREADS
2224 pthread_setspecific (gomp_thread_destructor
, thr
);
2228 /* The format of data is:
2231 data[2] alignment (on output array pointer)
2232 data[3] allocator (-1 if malloc allocator)
2233 data[4] next pointer
2234 data[5] used internally (htab pointer)
2235 data[6] used internally (end of array)
2239 ent[2] used internally (pointer to data[0])
2240 The entries are sorted by increasing offset, so that a binary
2241 search can be performed. Normally, data[8] is 0, exception is
2242 for worksharing construct task reductions in cancellable parallel,
2243 where at offset 0 there should be space for a pointer and an integer
2244 which are used internally. */
2247 GOMP_taskgroup_reduction_register (uintptr_t *data
)
2249 struct gomp_thread
*thr
= gomp_thread ();
2250 struct gomp_team
*team
= thr
->ts
.team
;
2251 struct gomp_task
*task
;
2253 if (__builtin_expect (team
== NULL
, 0))
2255 /* The task reduction code needs a team and task, so for
2256 orphaned taskgroups just create the implicit team. */
2257 gomp_create_artificial_team ();
2258 ialias_call (GOMP_taskgroup_start
) ();
2259 team
= thr
->ts
.team
;
2261 nthreads
= team
->nthreads
;
2263 gomp_reduction_register (data
, task
->taskgroup
->reductions
, NULL
, nthreads
);
2264 task
->taskgroup
->reductions
= data
;
2268 GOMP_taskgroup_reduction_unregister (uintptr_t *data
)
2270 uintptr_t *d
= data
;
2271 htab_free ((struct htab
*) data
[5]);
2274 gomp_aligned_free ((void *) d
[2]);
2275 d
= (uintptr_t *) d
[4];
2279 ialias (GOMP_taskgroup_reduction_unregister
)
2281 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2282 original list item or address of previously remapped original list
2283 item to address of the private copy, store that to ptrs[i].
2284 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2285 the original list item. */
2288 GOMP_task_reduction_remap (size_t cnt
, size_t cntorig
, void **ptrs
)
2290 struct gomp_thread
*thr
= gomp_thread ();
2291 struct gomp_task
*task
= thr
->task
;
2292 unsigned id
= thr
->ts
.team_id
;
2293 uintptr_t *data
= task
->taskgroup
->reductions
;
2295 struct htab
*reduction_htab
= (struct htab
*) data
[5];
2297 for (i
= 0; i
< cnt
; ++i
)
2299 hash_entry_type ent
, n
;
2300 __asm ("" : "=g" (ent
) : "0" (ptrs
+ i
));
2301 n
= htab_find (reduction_htab
, ent
);
2305 __asm ("" : "=g" (p
) : "0" (n
));
2306 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2307 p[1] is the offset within the allocated chunk for each
2308 thread, p[2] is the array registered with
2309 GOMP_taskgroup_reduction_register, d[2] is the base of the
2310 allocated memory and d[1] is the size of the allocated chunk
2312 d
= (uintptr_t *) p
[2];
2313 ptrs
[i
] = (void *) (d
[2] + id
* d
[1] + p
[1]);
2314 if (__builtin_expect (i
< cntorig
, 0))
2315 ptrs
[cnt
+ i
] = (void *) p
[0];
2321 if ((uintptr_t) ptrs
[i
] >= d
[2] && (uintptr_t) ptrs
[i
] < d
[6])
2323 d
= (uintptr_t *) d
[4];
2326 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2327 "task modifier for %p", ptrs
[i
]);
2328 uintptr_t off
= ((uintptr_t) ptrs
[i
] - d
[2]) % d
[1];
2329 ptrs
[i
] = (void *) (d
[2] + id
* d
[1] + off
);
2330 if (__builtin_expect (i
< cntorig
, 0))
2332 size_t lo
= 0, hi
= d
[0] - 1;
2335 size_t m
= (lo
+ hi
) / 2;
2336 if (d
[7 + 3 * m
+ 1] < off
)
2338 else if (d
[7 + 3 * m
+ 1] == off
)
2340 ptrs
[cnt
+ i
] = (void *) d
[7 + 3 * m
];
2347 gomp_fatal ("couldn't find matching task_reduction or reduction "
2348 "with task modifier for %p", ptrs
[i
]);
2353 struct gomp_taskgroup
*
2354 gomp_parallel_reduction_register (uintptr_t *data
, unsigned nthreads
)
2356 struct gomp_taskgroup
*taskgroup
= gomp_taskgroup_init (NULL
);
2357 gomp_reduction_register (data
, NULL
, NULL
, nthreads
);
2358 taskgroup
->reductions
= data
;
2363 gomp_workshare_task_reduction_register (uintptr_t *data
, uintptr_t *orig
)
2365 struct gomp_thread
*thr
= gomp_thread ();
2366 struct gomp_team
*team
= thr
->ts
.team
;
2367 struct gomp_task
*task
= thr
->task
;
2368 unsigned nthreads
= team
->nthreads
;
2369 gomp_reduction_register (data
, task
->taskgroup
->reductions
, orig
, nthreads
);
2370 task
->taskgroup
->reductions
= data
;
2374 gomp_workshare_taskgroup_start (void)
2376 struct gomp_thread
*thr
= gomp_thread ();
2377 struct gomp_team
*team
= thr
->ts
.team
;
2378 struct gomp_task
*task
;
2382 gomp_create_artificial_team ();
2383 team
= thr
->ts
.team
;
2386 task
->taskgroup
= gomp_taskgroup_init (task
->taskgroup
);
2387 task
->taskgroup
->workshare
= true;
2391 GOMP_workshare_task_reduction_unregister (bool cancelled
)
2393 struct gomp_thread
*thr
= gomp_thread ();
2394 struct gomp_task
*task
= thr
->task
;
2395 struct gomp_team
*team
= thr
->ts
.team
;
2396 uintptr_t *data
= task
->taskgroup
->reductions
;
2397 ialias_call (GOMP_taskgroup_end
) ();
2398 if (thr
->ts
.team_id
== 0)
2399 ialias_call (GOMP_taskgroup_reduction_unregister
) (data
);
2401 htab_free ((struct htab
*) data
[5]);
2404 gomp_team_barrier_wait (&team
->barrier
);
2410 struct gomp_thread
*thr
= gomp_thread ();
2411 return thr
->task
&& thr
->task
->final_task
;
2414 ialias (omp_in_final
)
2417 omp_fulfill_event (omp_event_handle_t event
)
2419 struct gomp_task
*task
= (struct gomp_task
*) event
;
2420 if (!task
->deferred_p
)
2422 if (gomp_sem_getcount (task
->completion_sem
) > 0)
2423 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task
);
2425 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2427 gomp_sem_post (task
->completion_sem
);
2431 struct gomp_team
*team
= __atomic_load_n (&task
->detach_team
,
2434 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2435 "been fulfilled!\n", task
);
2437 gomp_mutex_lock (&team
->task_lock
);
2438 if (task
->kind
!= GOMP_TASK_DETACHED
)
2440 /* The task has not finished running yet. */
2442 "omp_fulfill_event: %p event fulfilled for unfinished "
2444 __atomic_store_n (&task
->detach_team
, NULL
, MEMMODEL_RELAXED
);
2445 gomp_mutex_unlock (&team
->task_lock
);
2449 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2451 size_t new_tasks
= gomp_task_run_post_handle_depend (task
, team
);
2452 gomp_task_run_post_remove_parent (task
);
2453 gomp_clear_parent (&task
->children_queue
);
2454 gomp_task_run_post_remove_taskgroup (task
);
2456 team
->task_detach_count
--;
2459 bool shackled_thread_p
= team
== gomp_thread ()->ts
.team
;
2462 /* Wake up threads to run new tasks. */
2463 gomp_team_barrier_set_task_pending (&team
->barrier
);
2464 do_wake
= team
->nthreads
- team
->task_running_count
;
2465 if (do_wake
> new_tasks
)
2466 do_wake
= new_tasks
;
2469 if (!shackled_thread_p
2471 && team
->task_detach_count
== 0
2472 && gomp_team_barrier_waiting_for_tasks (&team
->barrier
))
2473 /* Ensure that at least one thread is woken up to signal that the
2474 barrier can finish. */
2477 /* If we are running in an unshackled thread, the team might vanish before
2478 gomp_team_barrier_wake is run if we release the lock first, so keep the
2479 lock for the call in that case. */
2480 if (shackled_thread_p
)
2481 gomp_mutex_unlock (&team
->task_lock
);
2483 gomp_team_barrier_wake (&team
->barrier
, do_wake
);
2484 if (!shackled_thread_p
)
2485 gomp_mutex_unlock (&team
->task_lock
);
2487 gomp_finish_task (task
);
2491 ialias (omp_fulfill_event
)