1 /* Copyright (C) 2005-2013 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file handles the LOOP (FOR/DO) construct. */
32 /* Initialize the given work share construct from the given arguments. */
35 gomp_loop_init (struct gomp_work_share
*ws
, long start
, long end
, long incr
,
36 enum gomp_schedule_type sched
, long chunk_size
)
39 ws
->chunk_size
= chunk_size
;
40 /* Canonicalize loops that have zero iterations to ->next == ->end. */
41 ws
->end
= ((incr
> 0 && start
> end
) || (incr
< 0 && start
< end
))
45 if (sched
== GFS_DYNAMIC
)
47 ws
->chunk_size
*= incr
;
49 #ifdef HAVE_SYNC_BUILTINS
51 /* For dynamic scheduling prepare things to make each iteration
53 struct gomp_thread
*thr
= gomp_thread ();
54 struct gomp_team
*team
= thr
->ts
.team
;
55 long nthreads
= team
? team
->nthreads
: 1;
57 if (__builtin_expect (incr
> 0, 1))
59 /* Cheap overflow protection. */
60 if (__builtin_expect ((nthreads
| ws
->chunk_size
)
61 >= 1UL << (sizeof (long)
62 * __CHAR_BIT__
/ 2 - 1), 0))
65 ws
->mode
= ws
->end
< (LONG_MAX
66 - (nthreads
+ 1) * ws
->chunk_size
);
68 /* Cheap overflow protection. */
69 else if (__builtin_expect ((nthreads
| -ws
->chunk_size
)
70 >= 1UL << (sizeof (long)
71 * __CHAR_BIT__
/ 2 - 1), 0))
74 ws
->mode
= ws
->end
> (nthreads
+ 1) * -ws
->chunk_size
- LONG_MAX
;
80 /* The *_start routines are called when first encountering a loop construct
81 that is not bound directly to a parallel construct. The first thread
82 that arrives will create the work-share construct; subsequent threads
83 will see the construct exists and allocate work from it.
85 START, END, INCR are the bounds of the loop; due to the restrictions of
86 OpenMP, these values must be the same in every thread. This is not
87 verified (nor is it entirely verifiable, since START is not necessarily
88 retained intact in the work-share data structure). CHUNK_SIZE is the
89 scheduling parameter; again this must be identical in all threads.
91 Returns true if there's any work for this thread to perform. If so,
92 *ISTART and *IEND are filled with the bounds of the iteration block
93 allocated to this thread. Returns false if all work was assigned to
94 other threads prior to this thread's arrival. */
97 gomp_loop_static_start (long start
, long end
, long incr
, long chunk_size
,
98 long *istart
, long *iend
)
100 struct gomp_thread
*thr
= gomp_thread ();
102 thr
->ts
.static_trip
= 0;
103 if (gomp_work_share_start (false))
105 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
106 GFS_STATIC
, chunk_size
);
107 gomp_work_share_init_done ();
110 return !gomp_iter_static_next (istart
, iend
);
114 gomp_loop_dynamic_start (long start
, long end
, long incr
, long chunk_size
,
115 long *istart
, long *iend
)
117 struct gomp_thread
*thr
= gomp_thread ();
120 if (gomp_work_share_start (false))
122 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
123 GFS_DYNAMIC
, chunk_size
);
124 gomp_work_share_init_done ();
127 #ifdef HAVE_SYNC_BUILTINS
128 ret
= gomp_iter_dynamic_next (istart
, iend
);
130 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
131 ret
= gomp_iter_dynamic_next_locked (istart
, iend
);
132 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
139 gomp_loop_guided_start (long start
, long end
, long incr
, long chunk_size
,
140 long *istart
, long *iend
)
142 struct gomp_thread
*thr
= gomp_thread ();
145 if (gomp_work_share_start (false))
147 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
148 GFS_GUIDED
, chunk_size
);
149 gomp_work_share_init_done ();
152 #ifdef HAVE_SYNC_BUILTINS
153 ret
= gomp_iter_guided_next (istart
, iend
);
155 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
156 ret
= gomp_iter_guided_next_locked (istart
, iend
);
157 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
164 GOMP_loop_runtime_start (long start
, long end
, long incr
,
165 long *istart
, long *iend
)
167 struct gomp_task_icv
*icv
= gomp_icv (false);
168 switch (icv
->run_sched_var
)
171 return gomp_loop_static_start (start
, end
, incr
, icv
->run_sched_modifier
,
174 return gomp_loop_dynamic_start (start
, end
, incr
, icv
->run_sched_modifier
,
177 return gomp_loop_guided_start (start
, end
, incr
, icv
->run_sched_modifier
,
180 /* For now map to schedule(static), later on we could play with feedback
182 return gomp_loop_static_start (start
, end
, incr
, 0, istart
, iend
);
188 /* The *_ordered_*_start routines are similar. The only difference is that
189 this work-share construct is initialized to expect an ORDERED section. */
192 gomp_loop_ordered_static_start (long start
, long end
, long incr
,
193 long chunk_size
, long *istart
, long *iend
)
195 struct gomp_thread
*thr
= gomp_thread ();
197 thr
->ts
.static_trip
= 0;
198 if (gomp_work_share_start (true))
200 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
201 GFS_STATIC
, chunk_size
);
202 gomp_ordered_static_init ();
203 gomp_work_share_init_done ();
206 return !gomp_iter_static_next (istart
, iend
);
210 gomp_loop_ordered_dynamic_start (long start
, long end
, long incr
,
211 long chunk_size
, long *istart
, long *iend
)
213 struct gomp_thread
*thr
= gomp_thread ();
216 if (gomp_work_share_start (true))
218 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
219 GFS_DYNAMIC
, chunk_size
);
220 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
221 gomp_work_share_init_done ();
224 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
226 ret
= gomp_iter_dynamic_next_locked (istart
, iend
);
228 gomp_ordered_first ();
229 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
235 gomp_loop_ordered_guided_start (long start
, long end
, long incr
,
236 long chunk_size
, long *istart
, long *iend
)
238 struct gomp_thread
*thr
= gomp_thread ();
241 if (gomp_work_share_start (true))
243 gomp_loop_init (thr
->ts
.work_share
, start
, end
, incr
,
244 GFS_GUIDED
, chunk_size
);
245 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
246 gomp_work_share_init_done ();
249 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
251 ret
= gomp_iter_guided_next_locked (istart
, iend
);
253 gomp_ordered_first ();
254 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
260 GOMP_loop_ordered_runtime_start (long start
, long end
, long incr
,
261 long *istart
, long *iend
)
263 struct gomp_task_icv
*icv
= gomp_icv (false);
264 switch (icv
->run_sched_var
)
267 return gomp_loop_ordered_static_start (start
, end
, incr
,
268 icv
->run_sched_modifier
,
271 return gomp_loop_ordered_dynamic_start (start
, end
, incr
,
272 icv
->run_sched_modifier
,
275 return gomp_loop_ordered_guided_start (start
, end
, incr
,
276 icv
->run_sched_modifier
,
279 /* For now map to schedule(static), later on we could play with feedback
281 return gomp_loop_ordered_static_start (start
, end
, incr
,
288 /* The *_next routines are called when the thread completes processing of
289 the iteration block currently assigned to it. If the work-share
290 construct is bound directly to a parallel construct, then the iteration
291 bounds may have been set up before the parallel. In which case, this
292 may be the first iteration for the thread.
294 Returns true if there is work remaining to be performed; *ISTART and
295 *IEND are filled with a new iteration block. Returns false if all work
296 has been assigned. */
299 gomp_loop_static_next (long *istart
, long *iend
)
301 return !gomp_iter_static_next (istart
, iend
);
305 gomp_loop_dynamic_next (long *istart
, long *iend
)
309 #ifdef HAVE_SYNC_BUILTINS
310 ret
= gomp_iter_dynamic_next (istart
, iend
);
312 struct gomp_thread
*thr
= gomp_thread ();
313 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
314 ret
= gomp_iter_dynamic_next_locked (istart
, iend
);
315 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
322 gomp_loop_guided_next (long *istart
, long *iend
)
326 #ifdef HAVE_SYNC_BUILTINS
327 ret
= gomp_iter_guided_next (istart
, iend
);
329 struct gomp_thread
*thr
= gomp_thread ();
330 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
331 ret
= gomp_iter_guided_next_locked (istart
, iend
);
332 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
339 GOMP_loop_runtime_next (long *istart
, long *iend
)
341 struct gomp_thread
*thr
= gomp_thread ();
343 switch (thr
->ts
.work_share
->sched
)
347 return gomp_loop_static_next (istart
, iend
);
349 return gomp_loop_dynamic_next (istart
, iend
);
351 return gomp_loop_guided_next (istart
, iend
);
357 /* The *_ordered_*_next routines are called when the thread completes
358 processing of the iteration block currently assigned to it.
360 Returns true if there is work remaining to be performed; *ISTART and
361 *IEND are filled with a new iteration block. Returns false if all work
362 has been assigned. */
365 gomp_loop_ordered_static_next (long *istart
, long *iend
)
367 struct gomp_thread
*thr
= gomp_thread ();
370 gomp_ordered_sync ();
371 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
372 test
= gomp_iter_static_next (istart
, iend
);
374 gomp_ordered_static_next ();
375 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
381 gomp_loop_ordered_dynamic_next (long *istart
, long *iend
)
383 struct gomp_thread
*thr
= gomp_thread ();
386 gomp_ordered_sync ();
387 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
388 ret
= gomp_iter_dynamic_next_locked (istart
, iend
);
390 gomp_ordered_next ();
392 gomp_ordered_last ();
393 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
399 gomp_loop_ordered_guided_next (long *istart
, long *iend
)
401 struct gomp_thread
*thr
= gomp_thread ();
404 gomp_ordered_sync ();
405 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
406 ret
= gomp_iter_guided_next_locked (istart
, iend
);
408 gomp_ordered_next ();
410 gomp_ordered_last ();
411 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
417 GOMP_loop_ordered_runtime_next (long *istart
, long *iend
)
419 struct gomp_thread
*thr
= gomp_thread ();
421 switch (thr
->ts
.work_share
->sched
)
425 return gomp_loop_ordered_static_next (istart
, iend
);
427 return gomp_loop_ordered_dynamic_next (istart
, iend
);
429 return gomp_loop_ordered_guided_next (istart
, iend
);
435 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
436 to avoid one synchronization once we get into the loop. */
439 gomp_parallel_loop_start (void (*fn
) (void *), void *data
,
440 unsigned num_threads
, long start
, long end
,
441 long incr
, enum gomp_schedule_type sched
,
442 long chunk_size
, unsigned int flags
)
444 struct gomp_team
*team
;
446 num_threads
= gomp_resolve_num_threads (num_threads
, 0);
447 team
= gomp_new_team (num_threads
);
448 gomp_loop_init (&team
->work_shares
[0], start
, end
, incr
, sched
, chunk_size
);
449 gomp_team_start (fn
, data
, num_threads
, flags
, team
);
453 GOMP_parallel_loop_static_start (void (*fn
) (void *), void *data
,
454 unsigned num_threads
, long start
, long end
,
455 long incr
, long chunk_size
)
457 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
458 GFS_STATIC
, chunk_size
, 0);
462 GOMP_parallel_loop_dynamic_start (void (*fn
) (void *), void *data
,
463 unsigned num_threads
, long start
, long end
,
464 long incr
, long chunk_size
)
466 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
467 GFS_DYNAMIC
, chunk_size
, 0);
471 GOMP_parallel_loop_guided_start (void (*fn
) (void *), void *data
,
472 unsigned num_threads
, long start
, long end
,
473 long incr
, long chunk_size
)
475 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
476 GFS_GUIDED
, chunk_size
, 0);
480 GOMP_parallel_loop_runtime_start (void (*fn
) (void *), void *data
,
481 unsigned num_threads
, long start
, long end
,
484 struct gomp_task_icv
*icv
= gomp_icv (false);
485 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
486 icv
->run_sched_var
, icv
->run_sched_modifier
, 0);
489 ialias_redirect (GOMP_parallel_end
)
492 GOMP_parallel_loop_static (void (*fn
) (void *), void *data
,
493 unsigned num_threads
, long start
, long end
,
494 long incr
, long chunk_size
, unsigned flags
)
496 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
497 GFS_STATIC
, chunk_size
, flags
);
499 GOMP_parallel_end ();
503 GOMP_parallel_loop_dynamic (void (*fn
) (void *), void *data
,
504 unsigned num_threads
, long start
, long end
,
505 long incr
, long chunk_size
, unsigned flags
)
507 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
508 GFS_DYNAMIC
, chunk_size
, flags
);
510 GOMP_parallel_end ();
514 GOMP_parallel_loop_guided (void (*fn
) (void *), void *data
,
515 unsigned num_threads
, long start
, long end
,
516 long incr
, long chunk_size
, unsigned flags
)
518 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
519 GFS_GUIDED
, chunk_size
, flags
);
521 GOMP_parallel_end ();
525 GOMP_parallel_loop_runtime (void (*fn
) (void *), void *data
,
526 unsigned num_threads
, long start
, long end
,
527 long incr
, unsigned flags
)
529 struct gomp_task_icv
*icv
= gomp_icv (false);
530 gomp_parallel_loop_start (fn
, data
, num_threads
, start
, end
, incr
,
531 icv
->run_sched_var
, icv
->run_sched_modifier
,
534 GOMP_parallel_end ();
537 /* The GOMP_loop_end* routines are called after the thread is told that
538 all loop iterations are complete. The first two versions synchronize
539 all threads; the nowait version does not. */
544 gomp_work_share_end ();
548 GOMP_loop_end_cancel (void)
550 return gomp_work_share_end_cancel ();
554 GOMP_loop_end_nowait (void)
556 gomp_work_share_end_nowait ();
560 /* We use static functions above so that we're sure that the "runtime"
561 function can defer to the proper routine without interposition. We
562 export the static function with a strong alias when possible, or with
563 a wrapper function otherwise. */
565 #ifdef HAVE_ATTRIBUTE_ALIAS
566 extern __typeof(gomp_loop_static_start
) GOMP_loop_static_start
567 __attribute__((alias ("gomp_loop_static_start")));
568 extern __typeof(gomp_loop_dynamic_start
) GOMP_loop_dynamic_start
569 __attribute__((alias ("gomp_loop_dynamic_start")));
570 extern __typeof(gomp_loop_guided_start
) GOMP_loop_guided_start
571 __attribute__((alias ("gomp_loop_guided_start")));
573 extern __typeof(gomp_loop_ordered_static_start
) GOMP_loop_ordered_static_start
574 __attribute__((alias ("gomp_loop_ordered_static_start")));
575 extern __typeof(gomp_loop_ordered_dynamic_start
) GOMP_loop_ordered_dynamic_start
576 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
577 extern __typeof(gomp_loop_ordered_guided_start
) GOMP_loop_ordered_guided_start
578 __attribute__((alias ("gomp_loop_ordered_guided_start")));
580 extern __typeof(gomp_loop_static_next
) GOMP_loop_static_next
581 __attribute__((alias ("gomp_loop_static_next")));
582 extern __typeof(gomp_loop_dynamic_next
) GOMP_loop_dynamic_next
583 __attribute__((alias ("gomp_loop_dynamic_next")));
584 extern __typeof(gomp_loop_guided_next
) GOMP_loop_guided_next
585 __attribute__((alias ("gomp_loop_guided_next")));
587 extern __typeof(gomp_loop_ordered_static_next
) GOMP_loop_ordered_static_next
588 __attribute__((alias ("gomp_loop_ordered_static_next")));
589 extern __typeof(gomp_loop_ordered_dynamic_next
) GOMP_loop_ordered_dynamic_next
590 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
591 extern __typeof(gomp_loop_ordered_guided_next
) GOMP_loop_ordered_guided_next
592 __attribute__((alias ("gomp_loop_ordered_guided_next")));
595 GOMP_loop_static_start (long start
, long end
, long incr
, long chunk_size
,
596 long *istart
, long *iend
)
598 return gomp_loop_static_start (start
, end
, incr
, chunk_size
, istart
, iend
);
602 GOMP_loop_dynamic_start (long start
, long end
, long incr
, long chunk_size
,
603 long *istart
, long *iend
)
605 return gomp_loop_dynamic_start (start
, end
, incr
, chunk_size
, istart
, iend
);
609 GOMP_loop_guided_start (long start
, long end
, long incr
, long chunk_size
,
610 long *istart
, long *iend
)
612 return gomp_loop_guided_start (start
, end
, incr
, chunk_size
, istart
, iend
);
616 GOMP_loop_ordered_static_start (long start
, long end
, long incr
,
617 long chunk_size
, long *istart
, long *iend
)
619 return gomp_loop_ordered_static_start (start
, end
, incr
, chunk_size
,
624 GOMP_loop_ordered_dynamic_start (long start
, long end
, long incr
,
625 long chunk_size
, long *istart
, long *iend
)
627 return gomp_loop_ordered_dynamic_start (start
, end
, incr
, chunk_size
,
632 GOMP_loop_ordered_guided_start (long start
, long end
, long incr
,
633 long chunk_size
, long *istart
, long *iend
)
635 return gomp_loop_ordered_guided_start (start
, end
, incr
, chunk_size
,
640 GOMP_loop_static_next (long *istart
, long *iend
)
642 return gomp_loop_static_next (istart
, iend
);
646 GOMP_loop_dynamic_next (long *istart
, long *iend
)
648 return gomp_loop_dynamic_next (istart
, iend
);
652 GOMP_loop_guided_next (long *istart
, long *iend
)
654 return gomp_loop_guided_next (istart
, iend
);
658 GOMP_loop_ordered_static_next (long *istart
, long *iend
)
660 return gomp_loop_ordered_static_next (istart
, iend
);
664 GOMP_loop_ordered_dynamic_next (long *istart
, long *iend
)
666 return gomp_loop_ordered_dynamic_next (istart
, iend
);
670 GOMP_loop_ordered_guided_next (long *istart
, long *iend
)
672 return gomp_loop_ordered_guided_next (istart
, iend
);