libstdc++: Optimize std::remove_pointer compilation performance
[official-gcc.git] / libgomp / loop.c
blobd6450fea039d73a67c0aeb8ce6349375a4c932a8
1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the LOOP (FOR/DO) construct. */
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
34 ialias (GOMP_loop_runtime_next)
35 ialias_redirect (GOMP_taskgroup_reduction_register)
37 /* Initialize the given work share construct from the given arguments. */
39 static inline void
40 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
41 enum gomp_schedule_type sched, long chunk_size)
43 ws->sched = sched;
44 ws->chunk_size = chunk_size;
45 /* Canonicalize loops that have zero iterations to ->next == ->end. */
46 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
47 ? start : end;
48 ws->incr = incr;
49 ws->next = start;
50 if (sched == GFS_DYNAMIC)
52 ws->chunk_size *= incr;
54 #ifdef HAVE_SYNC_BUILTINS
56 /* For dynamic scheduling prepare things to make each iteration
57 faster. */
58 struct gomp_thread *thr = gomp_thread ();
59 struct gomp_team *team = thr->ts.team;
60 long nthreads = team ? team->nthreads : 1;
62 if (__builtin_expect (incr > 0, 1))
64 /* Cheap overflow protection. */
65 if (__builtin_expect ((nthreads | ws->chunk_size)
66 >= 1UL << (sizeof (long)
67 * __CHAR_BIT__ / 2 - 1), 0))
68 ws->mode = 0;
69 else
70 ws->mode = ws->end < (LONG_MAX
71 - (nthreads + 1) * ws->chunk_size);
73 /* Cheap overflow protection. */
74 else if (__builtin_expect ((nthreads | -ws->chunk_size)
75 >= 1UL << (sizeof (long)
76 * __CHAR_BIT__ / 2 - 1), 0))
77 ws->mode = 0;
78 else
79 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
81 #endif
85 /* The *_start routines are called when first encountering a loop construct
86 that is not bound directly to a parallel construct. The first thread
87 that arrives will create the work-share construct; subsequent threads
88 will see the construct exists and allocate work from it.
90 START, END, INCR are the bounds of the loop; due to the restrictions of
91 OpenMP, these values must be the same in every thread. This is not
92 verified (nor is it entirely verifiable, since START is not necessarily
93 retained intact in the work-share data structure). CHUNK_SIZE is the
94 scheduling parameter; again this must be identical in all threads.
96 Returns true if there's any work for this thread to perform. If so,
97 *ISTART and *IEND are filled with the bounds of the iteration block
98 allocated to this thread. Returns false if all work was assigned to
99 other threads prior to this thread's arrival. */
101 static bool
102 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
103 long *istart, long *iend)
105 struct gomp_thread *thr = gomp_thread ();
107 thr->ts.static_trip = 0;
108 if (gomp_work_share_start (0))
110 gomp_loop_init (thr->ts.work_share, start, end, incr,
111 GFS_STATIC, chunk_size);
112 gomp_work_share_init_done ();
115 return !gomp_iter_static_next (istart, iend);
118 /* The current dynamic implementation is always monotonic. The
119 entrypoints without nonmonotonic in them have to be always monotonic,
120 but the nonmonotonic ones could be changed to use work-stealing for
121 improved scalability. */
123 static bool
124 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
125 long *istart, long *iend)
127 struct gomp_thread *thr = gomp_thread ();
128 bool ret;
130 if (gomp_work_share_start (0))
132 gomp_loop_init (thr->ts.work_share, start, end, incr,
133 GFS_DYNAMIC, chunk_size);
134 gomp_work_share_init_done ();
137 #ifdef HAVE_SYNC_BUILTINS
138 ret = gomp_iter_dynamic_next (istart, iend);
139 #else
140 gomp_mutex_lock (&thr->ts.work_share->lock);
141 ret = gomp_iter_dynamic_next_locked (istart, iend);
142 gomp_mutex_unlock (&thr->ts.work_share->lock);
143 #endif
145 return ret;
148 /* Similarly as for dynamic, though the question is how can the chunk sizes
149 be decreased without a central locking or atomics. */
151 static bool
152 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
153 long *istart, long *iend)
155 struct gomp_thread *thr = gomp_thread ();
156 bool ret;
158 if (gomp_work_share_start (0))
160 gomp_loop_init (thr->ts.work_share, start, end, incr,
161 GFS_GUIDED, chunk_size);
162 gomp_work_share_init_done ();
165 #ifdef HAVE_SYNC_BUILTINS
166 ret = gomp_iter_guided_next (istart, iend);
167 #else
168 gomp_mutex_lock (&thr->ts.work_share->lock);
169 ret = gomp_iter_guided_next_locked (istart, iend);
170 gomp_mutex_unlock (&thr->ts.work_share->lock);
171 #endif
173 return ret;
176 bool
177 GOMP_loop_runtime_start (long start, long end, long incr,
178 long *istart, long *iend)
180 struct gomp_task_icv *icv = gomp_icv (false);
181 switch (icv->run_sched_var & ~GFS_MONOTONIC)
183 case GFS_STATIC:
184 return gomp_loop_static_start (start, end, incr,
185 icv->run_sched_chunk_size,
186 istart, iend);
187 case GFS_DYNAMIC:
188 return gomp_loop_dynamic_start (start, end, incr,
189 icv->run_sched_chunk_size,
190 istart, iend);
191 case GFS_GUIDED:
192 return gomp_loop_guided_start (start, end, incr,
193 icv->run_sched_chunk_size,
194 istart, iend);
195 case GFS_AUTO:
196 /* For now map to schedule(static), later on we could play with feedback
197 driven choice. */
198 return gomp_loop_static_start (start, end, incr, 0, istart, iend);
199 default:
200 abort ();
204 static long
205 gomp_adjust_sched (long sched, long *chunk_size)
207 sched &= ~GFS_MONOTONIC;
208 switch (sched)
210 case GFS_STATIC:
211 case GFS_DYNAMIC:
212 case GFS_GUIDED:
213 return sched;
214 /* GFS_RUNTIME is used for runtime schedule without monotonic
215 or nonmonotonic modifiers on the clause.
216 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
217 modifier. */
218 case GFS_RUNTIME:
219 /* GFS_AUTO is used for runtime schedule with nonmonotonic
220 modifier. */
221 case GFS_AUTO:
223 struct gomp_task_icv *icv = gomp_icv (false);
224 sched = icv->run_sched_var & ~GFS_MONOTONIC;
225 switch (sched)
227 case GFS_STATIC:
228 case GFS_DYNAMIC:
229 case GFS_GUIDED:
230 *chunk_size = icv->run_sched_chunk_size;
231 break;
232 case GFS_AUTO:
233 sched = GFS_STATIC;
234 *chunk_size = 0;
235 break;
236 default:
237 abort ();
239 return sched;
241 default:
242 abort ();
246 bool
247 GOMP_loop_start (long start, long end, long incr, long sched,
248 long chunk_size, long *istart, long *iend,
249 uintptr_t *reductions, void **mem)
251 struct gomp_thread *thr = gomp_thread ();
253 thr->ts.static_trip = 0;
254 if (reductions)
255 gomp_workshare_taskgroup_start ();
256 if (gomp_work_share_start (0))
258 sched = gomp_adjust_sched (sched, &chunk_size);
259 gomp_loop_init (thr->ts.work_share, start, end, incr,
260 sched, chunk_size);
261 if (reductions)
263 GOMP_taskgroup_reduction_register (reductions);
264 thr->task->taskgroup->workshare = true;
265 thr->ts.work_share->task_reductions = reductions;
267 if (mem)
269 uintptr_t size = (uintptr_t) *mem;
270 #define INLINE_ORDERED_TEAM_IDS_OFF \
271 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
272 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
273 if (sizeof (struct gomp_work_share)
274 <= INLINE_ORDERED_TEAM_IDS_OFF
275 || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
276 || size > (sizeof (struct gomp_work_share)
277 - INLINE_ORDERED_TEAM_IDS_OFF))
278 *mem
279 = (void *) (thr->ts.work_share->ordered_team_ids
280 = gomp_malloc_cleared (size));
281 else
282 *mem = memset (((char *) thr->ts.work_share)
283 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
285 gomp_work_share_init_done ();
287 else
289 if (reductions)
291 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
292 gomp_workshare_task_reduction_register (reductions,
293 first_reductions);
295 if (mem)
297 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
298 & (__alignof__ (long long) - 1)) == 0)
299 *mem = (void *) thr->ts.work_share->ordered_team_ids;
300 else
302 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
303 p += __alignof__ (long long) - 1;
304 p &= ~(__alignof__ (long long) - 1);
305 *mem = (void *) p;
310 if (!istart)
311 return true;
312 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
315 /* The *_ordered_*_start routines are similar. The only difference is that
316 this work-share construct is initialized to expect an ORDERED section. */
318 static bool
319 gomp_loop_ordered_static_start (long start, long end, long incr,
320 long chunk_size, long *istart, long *iend)
322 struct gomp_thread *thr = gomp_thread ();
324 thr->ts.static_trip = 0;
325 if (gomp_work_share_start (1))
327 gomp_loop_init (thr->ts.work_share, start, end, incr,
328 GFS_STATIC, chunk_size);
329 gomp_ordered_static_init ();
330 gomp_work_share_init_done ();
333 return !gomp_iter_static_next (istart, iend);
336 static bool
337 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
338 long chunk_size, long *istart, long *iend)
340 struct gomp_thread *thr = gomp_thread ();
341 bool ret;
343 if (gomp_work_share_start (1))
345 gomp_loop_init (thr->ts.work_share, start, end, incr,
346 GFS_DYNAMIC, chunk_size);
347 gomp_mutex_lock (&thr->ts.work_share->lock);
348 gomp_work_share_init_done ();
350 else
351 gomp_mutex_lock (&thr->ts.work_share->lock);
353 ret = gomp_iter_dynamic_next_locked (istart, iend);
354 if (ret)
355 gomp_ordered_first ();
356 gomp_mutex_unlock (&thr->ts.work_share->lock);
358 return ret;
361 static bool
362 gomp_loop_ordered_guided_start (long start, long end, long incr,
363 long chunk_size, long *istart, long *iend)
365 struct gomp_thread *thr = gomp_thread ();
366 bool ret;
368 if (gomp_work_share_start (1))
370 gomp_loop_init (thr->ts.work_share, start, end, incr,
371 GFS_GUIDED, chunk_size);
372 gomp_mutex_lock (&thr->ts.work_share->lock);
373 gomp_work_share_init_done ();
375 else
376 gomp_mutex_lock (&thr->ts.work_share->lock);
378 ret = gomp_iter_guided_next_locked (istart, iend);
379 if (ret)
380 gomp_ordered_first ();
381 gomp_mutex_unlock (&thr->ts.work_share->lock);
383 return ret;
386 bool
387 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
388 long *istart, long *iend)
390 struct gomp_task_icv *icv = gomp_icv (false);
391 switch (icv->run_sched_var & ~GFS_MONOTONIC)
393 case GFS_STATIC:
394 return gomp_loop_ordered_static_start (start, end, incr,
395 icv->run_sched_chunk_size,
396 istart, iend);
397 case GFS_DYNAMIC:
398 return gomp_loop_ordered_dynamic_start (start, end, incr,
399 icv->run_sched_chunk_size,
400 istart, iend);
401 case GFS_GUIDED:
402 return gomp_loop_ordered_guided_start (start, end, incr,
403 icv->run_sched_chunk_size,
404 istart, iend);
405 case GFS_AUTO:
406 /* For now map to schedule(static), later on we could play with feedback
407 driven choice. */
408 return gomp_loop_ordered_static_start (start, end, incr,
409 0, istart, iend);
410 default:
411 abort ();
415 bool
416 GOMP_loop_ordered_start (long start, long end, long incr, long sched,
417 long chunk_size, long *istart, long *iend,
418 uintptr_t *reductions, void **mem)
420 struct gomp_thread *thr = gomp_thread ();
421 size_t ordered = 1;
422 bool ret;
424 thr->ts.static_trip = 0;
425 if (reductions)
426 gomp_workshare_taskgroup_start ();
427 if (mem)
428 ordered += (uintptr_t) *mem;
429 if (gomp_work_share_start (ordered))
431 sched = gomp_adjust_sched (sched, &chunk_size);
432 gomp_loop_init (thr->ts.work_share, start, end, incr,
433 sched, chunk_size);
434 if (reductions)
436 GOMP_taskgroup_reduction_register (reductions);
437 thr->task->taskgroup->workshare = true;
438 thr->ts.work_share->task_reductions = reductions;
440 if (sched == GFS_STATIC)
441 gomp_ordered_static_init ();
442 else
443 gomp_mutex_lock (&thr->ts.work_share->lock);
444 gomp_work_share_init_done ();
446 else
448 if (reductions)
450 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
451 gomp_workshare_task_reduction_register (reductions,
452 first_reductions);
454 sched = thr->ts.work_share->sched;
455 if (sched != GFS_STATIC)
456 gomp_mutex_lock (&thr->ts.work_share->lock);
459 if (mem)
461 uintptr_t p
462 = (uintptr_t) (thr->ts.work_share->ordered_team_ids
463 + (thr->ts.team ? thr->ts.team->nthreads : 1));
464 p += __alignof__ (long long) - 1;
465 p &= ~(__alignof__ (long long) - 1);
466 *mem = (void *) p;
469 switch (sched)
471 case GFS_STATIC:
472 case GFS_AUTO:
473 return !gomp_iter_static_next (istart, iend);
474 case GFS_DYNAMIC:
475 ret = gomp_iter_dynamic_next_locked (istart, iend);
476 break;
477 case GFS_GUIDED:
478 ret = gomp_iter_guided_next_locked (istart, iend);
479 break;
480 default:
481 abort ();
484 if (ret)
485 gomp_ordered_first ();
486 gomp_mutex_unlock (&thr->ts.work_share->lock);
487 return ret;
490 /* The *_doacross_*_start routines are similar. The only difference is that
491 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
492 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
493 and other COUNTS array elements tell the library number of iterations
494 in the ordered inner loops. */
496 static bool
497 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
498 long chunk_size, long *istart, long *iend)
500 struct gomp_thread *thr = gomp_thread ();
502 thr->ts.static_trip = 0;
503 if (gomp_work_share_start (0))
505 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
506 GFS_STATIC, chunk_size);
507 gomp_doacross_init (ncounts, counts, chunk_size, 0);
508 gomp_work_share_init_done ();
511 return !gomp_iter_static_next (istart, iend);
514 static bool
515 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
516 long chunk_size, long *istart, long *iend)
518 struct gomp_thread *thr = gomp_thread ();
519 bool ret;
521 if (gomp_work_share_start (0))
523 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
524 GFS_DYNAMIC, chunk_size);
525 gomp_doacross_init (ncounts, counts, chunk_size, 0);
526 gomp_work_share_init_done ();
529 #ifdef HAVE_SYNC_BUILTINS
530 ret = gomp_iter_dynamic_next (istart, iend);
531 #else
532 gomp_mutex_lock (&thr->ts.work_share->lock);
533 ret = gomp_iter_dynamic_next_locked (istart, iend);
534 gomp_mutex_unlock (&thr->ts.work_share->lock);
535 #endif
537 return ret;
540 static bool
541 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
542 long chunk_size, long *istart, long *iend)
544 struct gomp_thread *thr = gomp_thread ();
545 bool ret;
547 if (gomp_work_share_start (0))
549 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
550 GFS_GUIDED, chunk_size);
551 gomp_doacross_init (ncounts, counts, chunk_size, 0);
552 gomp_work_share_init_done ();
555 #ifdef HAVE_SYNC_BUILTINS
556 ret = gomp_iter_guided_next (istart, iend);
557 #else
558 gomp_mutex_lock (&thr->ts.work_share->lock);
559 ret = gomp_iter_guided_next_locked (istart, iend);
560 gomp_mutex_unlock (&thr->ts.work_share->lock);
561 #endif
563 return ret;
566 bool
567 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
568 long *istart, long *iend)
570 struct gomp_task_icv *icv = gomp_icv (false);
571 switch (icv->run_sched_var & ~GFS_MONOTONIC)
573 case GFS_STATIC:
574 return gomp_loop_doacross_static_start (ncounts, counts,
575 icv->run_sched_chunk_size,
576 istart, iend);
577 case GFS_DYNAMIC:
578 return gomp_loop_doacross_dynamic_start (ncounts, counts,
579 icv->run_sched_chunk_size,
580 istart, iend);
581 case GFS_GUIDED:
582 return gomp_loop_doacross_guided_start (ncounts, counts,
583 icv->run_sched_chunk_size,
584 istart, iend);
585 case GFS_AUTO:
586 /* For now map to schedule(static), later on we could play with feedback
587 driven choice. */
588 return gomp_loop_doacross_static_start (ncounts, counts,
589 0, istart, iend);
590 default:
591 abort ();
595 bool
596 GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
597 long chunk_size, long *istart, long *iend,
598 uintptr_t *reductions, void **mem)
600 struct gomp_thread *thr = gomp_thread ();
602 thr->ts.static_trip = 0;
603 if (reductions)
604 gomp_workshare_taskgroup_start ();
605 if (gomp_work_share_start (0))
607 size_t extra = 0;
608 if (mem)
609 extra = (uintptr_t) *mem;
610 sched = gomp_adjust_sched (sched, &chunk_size);
611 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
612 sched, chunk_size);
613 gomp_doacross_init (ncounts, counts, chunk_size, extra);
614 if (reductions)
616 GOMP_taskgroup_reduction_register (reductions);
617 thr->task->taskgroup->workshare = true;
618 thr->ts.work_share->task_reductions = reductions;
620 gomp_work_share_init_done ();
622 else
624 if (reductions)
626 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
627 gomp_workshare_task_reduction_register (reductions,
628 first_reductions);
630 sched = thr->ts.work_share->sched;
633 if (mem)
634 *mem = thr->ts.work_share->doacross->extra;
636 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
639 /* The *_next routines are called when the thread completes processing of
640 the iteration block currently assigned to it. If the work-share
641 construct is bound directly to a parallel construct, then the iteration
642 bounds may have been set up before the parallel. In which case, this
643 may be the first iteration for the thread.
645 Returns true if there is work remaining to be performed; *ISTART and
646 *IEND are filled with a new iteration block. Returns false if all work
647 has been assigned. */
649 static bool
650 gomp_loop_static_next (long *istart, long *iend)
652 return !gomp_iter_static_next (istart, iend);
655 static bool
656 gomp_loop_dynamic_next (long *istart, long *iend)
658 bool ret;
660 #ifdef HAVE_SYNC_BUILTINS
661 ret = gomp_iter_dynamic_next (istart, iend);
662 #else
663 struct gomp_thread *thr = gomp_thread ();
664 gomp_mutex_lock (&thr->ts.work_share->lock);
665 ret = gomp_iter_dynamic_next_locked (istart, iend);
666 gomp_mutex_unlock (&thr->ts.work_share->lock);
667 #endif
669 return ret;
672 static bool
673 gomp_loop_guided_next (long *istart, long *iend)
675 bool ret;
677 #ifdef HAVE_SYNC_BUILTINS
678 ret = gomp_iter_guided_next (istart, iend);
679 #else
680 struct gomp_thread *thr = gomp_thread ();
681 gomp_mutex_lock (&thr->ts.work_share->lock);
682 ret = gomp_iter_guided_next_locked (istart, iend);
683 gomp_mutex_unlock (&thr->ts.work_share->lock);
684 #endif
686 return ret;
689 bool
690 GOMP_loop_runtime_next (long *istart, long *iend)
692 struct gomp_thread *thr = gomp_thread ();
694 switch (thr->ts.work_share->sched)
696 case GFS_STATIC:
697 case GFS_AUTO:
698 return gomp_loop_static_next (istart, iend);
699 case GFS_DYNAMIC:
700 return gomp_loop_dynamic_next (istart, iend);
701 case GFS_GUIDED:
702 return gomp_loop_guided_next (istart, iend);
703 default:
704 abort ();
708 /* The *_ordered_*_next routines are called when the thread completes
709 processing of the iteration block currently assigned to it.
711 Returns true if there is work remaining to be performed; *ISTART and
712 *IEND are filled with a new iteration block. Returns false if all work
713 has been assigned. */
715 static bool
716 gomp_loop_ordered_static_next (long *istart, long *iend)
718 struct gomp_thread *thr = gomp_thread ();
719 int test;
721 gomp_ordered_sync ();
722 gomp_mutex_lock (&thr->ts.work_share->lock);
723 test = gomp_iter_static_next (istart, iend);
724 if (test >= 0)
725 gomp_ordered_static_next ();
726 gomp_mutex_unlock (&thr->ts.work_share->lock);
728 return test == 0;
731 static bool
732 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
734 struct gomp_thread *thr = gomp_thread ();
735 bool ret;
737 gomp_ordered_sync ();
738 gomp_mutex_lock (&thr->ts.work_share->lock);
739 ret = gomp_iter_dynamic_next_locked (istart, iend);
740 if (ret)
741 gomp_ordered_next ();
742 else
743 gomp_ordered_last ();
744 gomp_mutex_unlock (&thr->ts.work_share->lock);
746 return ret;
749 static bool
750 gomp_loop_ordered_guided_next (long *istart, long *iend)
752 struct gomp_thread *thr = gomp_thread ();
753 bool ret;
755 gomp_ordered_sync ();
756 gomp_mutex_lock (&thr->ts.work_share->lock);
757 ret = gomp_iter_guided_next_locked (istart, iend);
758 if (ret)
759 gomp_ordered_next ();
760 else
761 gomp_ordered_last ();
762 gomp_mutex_unlock (&thr->ts.work_share->lock);
764 return ret;
767 bool
768 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
770 struct gomp_thread *thr = gomp_thread ();
772 switch (thr->ts.work_share->sched)
774 case GFS_STATIC:
775 case GFS_AUTO:
776 return gomp_loop_ordered_static_next (istart, iend);
777 case GFS_DYNAMIC:
778 return gomp_loop_ordered_dynamic_next (istart, iend);
779 case GFS_GUIDED:
780 return gomp_loop_ordered_guided_next (istart, iend);
781 default:
782 abort ();
786 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
787 to avoid one synchronization once we get into the loop. */
789 static void
790 gomp_parallel_loop_start (void (*fn) (void *), void *data,
791 unsigned num_threads, long start, long end,
792 long incr, enum gomp_schedule_type sched,
793 long chunk_size, unsigned int flags)
795 struct gomp_team *team;
797 num_threads = gomp_resolve_num_threads (num_threads, 0);
798 team = gomp_new_team (num_threads);
799 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
800 gomp_team_start (fn, data, num_threads, flags, team, NULL);
803 void
804 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
805 unsigned num_threads, long start, long end,
806 long incr, long chunk_size)
808 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
809 GFS_STATIC, chunk_size, 0);
812 void
813 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
814 unsigned num_threads, long start, long end,
815 long incr, long chunk_size)
817 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
818 GFS_DYNAMIC, chunk_size, 0);
821 void
822 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
823 unsigned num_threads, long start, long end,
824 long incr, long chunk_size)
826 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
827 GFS_GUIDED, chunk_size, 0);
830 void
831 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
832 unsigned num_threads, long start, long end,
833 long incr)
835 struct gomp_task_icv *icv = gomp_icv (false);
836 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
837 icv->run_sched_var & ~GFS_MONOTONIC,
838 icv->run_sched_chunk_size, 0);
841 ialias_redirect (GOMP_parallel_end)
843 void
844 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
845 unsigned num_threads, long start, long end,
846 long incr, long chunk_size, unsigned flags)
848 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
849 GFS_STATIC, chunk_size, flags);
850 fn (data);
851 GOMP_parallel_end ();
854 void
855 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
856 unsigned num_threads, long start, long end,
857 long incr, long chunk_size, unsigned flags)
859 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
860 GFS_DYNAMIC, chunk_size, flags);
861 fn (data);
862 GOMP_parallel_end ();
865 void
866 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
867 unsigned num_threads, long start, long end,
868 long incr, long chunk_size, unsigned flags)
870 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
871 GFS_GUIDED, chunk_size, flags);
872 fn (data);
873 GOMP_parallel_end ();
876 void
877 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
878 unsigned num_threads, long start, long end,
879 long incr, unsigned flags)
881 struct gomp_task_icv *icv = gomp_icv (false);
882 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
883 icv->run_sched_var & ~GFS_MONOTONIC,
884 icv->run_sched_chunk_size, flags);
885 fn (data);
886 GOMP_parallel_end ();
889 #ifdef HAVE_ATTRIBUTE_ALIAS
890 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
891 __attribute__((alias ("GOMP_parallel_loop_dynamic")));
892 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
893 __attribute__((alias ("GOMP_parallel_loop_guided")));
894 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
895 __attribute__((alias ("GOMP_parallel_loop_runtime")));
896 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
897 __attribute__((alias ("GOMP_parallel_loop_runtime")));
898 #else
899 void
900 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
901 unsigned num_threads, long start,
902 long end, long incr, long chunk_size,
903 unsigned flags)
905 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
906 GFS_DYNAMIC, chunk_size, flags);
907 fn (data);
908 GOMP_parallel_end ();
911 void
912 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
913 unsigned num_threads, long start,
914 long end, long incr, long chunk_size,
915 unsigned flags)
917 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
918 GFS_GUIDED, chunk_size, flags);
919 fn (data);
920 GOMP_parallel_end ();
923 void
924 GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
925 unsigned num_threads, long start,
926 long end, long incr, unsigned flags)
928 struct gomp_task_icv *icv = gomp_icv (false);
929 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
930 icv->run_sched_var & ~GFS_MONOTONIC,
931 icv->run_sched_chunk_size, flags);
932 fn (data);
933 GOMP_parallel_end ();
936 void
937 GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
938 unsigned num_threads, long start,
939 long end, long incr,
940 unsigned flags)
942 struct gomp_task_icv *icv = gomp_icv (false);
943 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
944 icv->run_sched_var & ~GFS_MONOTONIC,
945 icv->run_sched_chunk_size, flags);
946 fn (data);
947 GOMP_parallel_end ();
949 #endif
951 /* The GOMP_loop_end* routines are called after the thread is told that
952 all loop iterations are complete. The first two versions synchronize
953 all threads; the nowait version does not. */
955 void
956 GOMP_loop_end (void)
958 gomp_work_share_end ();
961 bool
962 GOMP_loop_end_cancel (void)
964 return gomp_work_share_end_cancel ();
967 void
968 GOMP_loop_end_nowait (void)
970 gomp_work_share_end_nowait ();
974 /* We use static functions above so that we're sure that the "runtime"
975 function can defer to the proper routine without interposition. We
976 export the static function with a strong alias when possible, or with
977 a wrapper function otherwise. */
979 #ifdef HAVE_ATTRIBUTE_ALIAS
980 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
981 __attribute__((alias ("gomp_loop_static_start")));
982 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
983 __attribute__((alias ("gomp_loop_dynamic_start")));
984 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
985 __attribute__((alias ("gomp_loop_guided_start")));
986 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
987 __attribute__((alias ("gomp_loop_dynamic_start")));
988 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
989 __attribute__((alias ("gomp_loop_guided_start")));
990 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
991 __attribute__((alias ("GOMP_loop_runtime_start")));
992 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
993 __attribute__((alias ("GOMP_loop_runtime_start")));
995 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
996 __attribute__((alias ("gomp_loop_ordered_static_start")));
997 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
998 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
999 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
1000 __attribute__((alias ("gomp_loop_ordered_guided_start")));
1002 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1003 __attribute__((alias ("gomp_loop_doacross_static_start")));
1004 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1005 __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1006 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1007 __attribute__((alias ("gomp_loop_doacross_guided_start")));
1009 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1010 __attribute__((alias ("gomp_loop_static_next")));
1011 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1012 __attribute__((alias ("gomp_loop_dynamic_next")));
1013 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1014 __attribute__((alias ("gomp_loop_guided_next")));
1015 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1016 __attribute__((alias ("gomp_loop_dynamic_next")));
1017 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1018 __attribute__((alias ("gomp_loop_guided_next")));
1019 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1020 __attribute__((alias ("GOMP_loop_runtime_next")));
1021 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1022 __attribute__((alias ("GOMP_loop_runtime_next")));
1024 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1025 __attribute__((alias ("gomp_loop_ordered_static_next")));
1026 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1027 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1028 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1029 __attribute__((alias ("gomp_loop_ordered_guided_next")));
1030 #else
1031 bool
1032 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1033 long *istart, long *iend)
1035 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1038 bool
1039 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1040 long *istart, long *iend)
1042 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1045 bool
1046 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1047 long *istart, long *iend)
1049 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1052 bool
1053 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1054 long chunk_size, long *istart,
1055 long *iend)
1057 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1060 bool
1061 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1062 long chunk_size, long *istart, long *iend)
1064 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1067 bool
1068 GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1069 long *istart, long *iend)
1071 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1074 bool
1075 GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1076 long *istart, long *iend)
1078 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1081 bool
1082 GOMP_loop_ordered_static_start (long start, long end, long incr,
1083 long chunk_size, long *istart, long *iend)
1085 return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1086 istart, iend);
1089 bool
1090 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1091 long chunk_size, long *istart, long *iend)
1093 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1094 istart, iend);
1097 bool
1098 GOMP_loop_ordered_guided_start (long start, long end, long incr,
1099 long chunk_size, long *istart, long *iend)
1101 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1102 istart, iend);
1105 bool
1106 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1107 long chunk_size, long *istart, long *iend)
1109 return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1110 istart, iend);
1113 bool
1114 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1115 long chunk_size, long *istart, long *iend)
1117 return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1118 istart, iend);
1121 bool
1122 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1123 long chunk_size, long *istart, long *iend)
1125 return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1126 istart, iend);
1129 bool
1130 GOMP_loop_static_next (long *istart, long *iend)
1132 return gomp_loop_static_next (istart, iend);
1135 bool
1136 GOMP_loop_dynamic_next (long *istart, long *iend)
1138 return gomp_loop_dynamic_next (istart, iend);
1141 bool
1142 GOMP_loop_guided_next (long *istart, long *iend)
1144 return gomp_loop_guided_next (istart, iend);
1147 bool
1148 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1150 return gomp_loop_dynamic_next (istart, iend);
1153 bool
1154 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1156 return gomp_loop_guided_next (istart, iend);
1159 bool
1160 GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1162 return GOMP_loop_runtime_next (istart, iend);
1165 bool
1166 GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1168 return GOMP_loop_runtime_next (istart, iend);
1171 bool
1172 GOMP_loop_ordered_static_next (long *istart, long *iend)
1174 return gomp_loop_ordered_static_next (istart, iend);
1177 bool
1178 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1180 return gomp_loop_ordered_dynamic_next (istart, iend);
1183 bool
1184 GOMP_loop_ordered_guided_next (long *istart, long *iend)
1186 return gomp_loop_ordered_guided_next (istart, iend);
1188 #endif