1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the LOOP (FOR/DO) construct. */
32 typedef unsigned long long gomp_ull
;
34 /* Initialize the given work share construct from the given arguments. */
37 gomp_loop_ull_init (struct gomp_work_share
*ws
, bool up
, gomp_ull start
,
38 gomp_ull end
, gomp_ull incr
, enum gomp_schedule_type sched
,
42 ws
->chunk_size_ull
= chunk_size
;
43 /* Canonicalize loops that have zero iterations to ->next == ->end. */
44 ws
->end_ull
= ((up
&& start
> end
) || (!up
&& start
< end
))
49 if (sched
== GFS_DYNAMIC
)
51 ws
->chunk_size_ull
*= incr
;
53 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
55 /* For dynamic scheduling prepare things to make each iteration
57 struct gomp_thread
*thr
= gomp_thread ();
58 struct gomp_team
*team
= thr
->ts
.team
;
59 long nthreads
= team
? team
->nthreads
: 1;
61 if (__builtin_expect (up
, 1))
63 /* Cheap overflow protection. */
64 if (__builtin_expect ((nthreads
| ws
->chunk_size_ull
)
65 < 1ULL << (sizeof (gomp_ull
)
66 * __CHAR_BIT__
/ 2 - 1), 1))
67 ws
->mode
= ws
->end_ull
< (__LONG_LONG_MAX__
* 2ULL + 1
68 - (nthreads
+ 1) * ws
->chunk_size_ull
);
70 /* Cheap overflow protection. */
71 else if (__builtin_expect ((nthreads
| -ws
->chunk_size_ull
)
72 < 1ULL << (sizeof (gomp_ull
)
73 * __CHAR_BIT__
/ 2 - 1), 1))
74 ws
->mode
= ws
->end_ull
> ((nthreads
+ 1) * -ws
->chunk_size_ull
75 - (__LONG_LONG_MAX__
* 2ULL + 1));
83 /* The *_start routines are called when first encountering a loop construct
84 that is not bound directly to a parallel construct. The first thread
85 that arrives will create the work-share construct; subsequent threads
86 will see the construct exists and allocate work from it.
88 START, END, INCR are the bounds of the loop; due to the restrictions of
89 OpenMP, these values must be the same in every thread. This is not
90 verified (nor is it entirely verifiable, since START is not necessarily
91 retained intact in the work-share data structure). CHUNK_SIZE is the
92 scheduling parameter; again this must be identical in all threads.
94 Returns true if there's any work for this thread to perform. If so,
95 *ISTART and *IEND are filled with the bounds of the iteration block
96 allocated to this thread. Returns false if all work was assigned to
97 other threads prior to this thread's arrival. */
100 gomp_loop_ull_static_start (bool up
, gomp_ull start
, gomp_ull end
,
101 gomp_ull incr
, gomp_ull chunk_size
,
102 gomp_ull
*istart
, gomp_ull
*iend
)
104 struct gomp_thread
*thr
= gomp_thread ();
106 thr
->ts
.static_trip
= 0;
107 if (gomp_work_share_start (false))
109 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
110 GFS_STATIC
, chunk_size
);
111 gomp_work_share_init_done ();
114 return !gomp_iter_ull_static_next (istart
, iend
);
118 gomp_loop_ull_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
119 gomp_ull incr
, gomp_ull chunk_size
,
120 gomp_ull
*istart
, gomp_ull
*iend
)
122 struct gomp_thread
*thr
= gomp_thread ();
125 if (gomp_work_share_start (false))
127 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
128 GFS_DYNAMIC
, chunk_size
);
129 gomp_work_share_init_done ();
132 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
133 ret
= gomp_iter_ull_dynamic_next (istart
, iend
);
135 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
136 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
137 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
144 gomp_loop_ull_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
145 gomp_ull incr
, gomp_ull chunk_size
,
146 gomp_ull
*istart
, gomp_ull
*iend
)
148 struct gomp_thread
*thr
= gomp_thread ();
151 if (gomp_work_share_start (false))
153 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
154 GFS_GUIDED
, chunk_size
);
155 gomp_work_share_init_done ();
158 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
159 ret
= gomp_iter_ull_guided_next (istart
, iend
);
161 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
162 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
163 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
170 GOMP_loop_ull_runtime_start (bool up
, gomp_ull start
, gomp_ull end
,
171 gomp_ull incr
, gomp_ull
*istart
, gomp_ull
*iend
)
173 struct gomp_task_icv
*icv
= gomp_icv (false);
174 switch (icv
->run_sched_var
)
177 return gomp_loop_ull_static_start (up
, start
, end
, incr
,
178 icv
->run_sched_modifier
,
181 return gomp_loop_ull_dynamic_start (up
, start
, end
, incr
,
182 icv
->run_sched_modifier
,
185 return gomp_loop_ull_guided_start (up
, start
, end
, incr
,
186 icv
->run_sched_modifier
,
189 /* For now map to schedule(static), later on we could play with feedback
191 return gomp_loop_ull_static_start (up
, start
, end
, incr
,
198 /* The *_ordered_*_start routines are similar. The only difference is that
199 this work-share construct is initialized to expect an ORDERED section. */
202 gomp_loop_ull_ordered_static_start (bool up
, gomp_ull start
, gomp_ull end
,
203 gomp_ull incr
, gomp_ull chunk_size
,
204 gomp_ull
*istart
, gomp_ull
*iend
)
206 struct gomp_thread
*thr
= gomp_thread ();
208 thr
->ts
.static_trip
= 0;
209 if (gomp_work_share_start (true))
211 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
212 GFS_STATIC
, chunk_size
);
213 gomp_ordered_static_init ();
214 gomp_work_share_init_done ();
217 return !gomp_iter_ull_static_next (istart
, iend
);
221 gomp_loop_ull_ordered_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
222 gomp_ull incr
, gomp_ull chunk_size
,
223 gomp_ull
*istart
, gomp_ull
*iend
)
225 struct gomp_thread
*thr
= gomp_thread ();
228 if (gomp_work_share_start (true))
230 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
231 GFS_DYNAMIC
, chunk_size
);
232 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
233 gomp_work_share_init_done ();
236 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
238 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
240 gomp_ordered_first ();
241 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
247 gomp_loop_ull_ordered_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
248 gomp_ull incr
, gomp_ull chunk_size
,
249 gomp_ull
*istart
, gomp_ull
*iend
)
251 struct gomp_thread
*thr
= gomp_thread ();
254 if (gomp_work_share_start (true))
256 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
257 GFS_GUIDED
, chunk_size
);
258 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
259 gomp_work_share_init_done ();
262 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
264 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
266 gomp_ordered_first ();
267 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
273 GOMP_loop_ull_ordered_runtime_start (bool up
, gomp_ull start
, gomp_ull end
,
274 gomp_ull incr
, gomp_ull
*istart
,
277 struct gomp_task_icv
*icv
= gomp_icv (false);
278 switch (icv
->run_sched_var
)
281 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
,
282 icv
->run_sched_modifier
,
285 return gomp_loop_ull_ordered_dynamic_start (up
, start
, end
, incr
,
286 icv
->run_sched_modifier
,
289 return gomp_loop_ull_ordered_guided_start (up
, start
, end
, incr
,
290 icv
->run_sched_modifier
,
293 /* For now map to schedule(static), later on we could play with feedback
295 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
,
302 /* The *_next routines are called when the thread completes processing of
303 the iteration block currently assigned to it. If the work-share
304 construct is bound directly to a parallel construct, then the iteration
305 bounds may have been set up before the parallel. In which case, this
306 may be the first iteration for the thread.
308 Returns true if there is work remaining to be performed; *ISTART and
309 *IEND are filled with a new iteration block. Returns false if all work
310 has been assigned. */
313 gomp_loop_ull_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
315 return !gomp_iter_ull_static_next (istart
, iend
);
319 gomp_loop_ull_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
323 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
324 ret
= gomp_iter_ull_dynamic_next (istart
, iend
);
326 struct gomp_thread
*thr
= gomp_thread ();
327 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
328 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
329 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
336 gomp_loop_ull_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
340 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
341 ret
= gomp_iter_ull_guided_next (istart
, iend
);
343 struct gomp_thread
*thr
= gomp_thread ();
344 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
345 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
346 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
353 GOMP_loop_ull_runtime_next (gomp_ull
*istart
, gomp_ull
*iend
)
355 struct gomp_thread
*thr
= gomp_thread ();
357 switch (thr
->ts
.work_share
->sched
)
361 return gomp_loop_ull_static_next (istart
, iend
);
363 return gomp_loop_ull_dynamic_next (istart
, iend
);
365 return gomp_loop_ull_guided_next (istart
, iend
);
371 /* The *_ordered_*_next routines are called when the thread completes
372 processing of the iteration block currently assigned to it.
374 Returns true if there is work remaining to be performed; *ISTART and
375 *IEND are filled with a new iteration block. Returns false if all work
376 has been assigned. */
379 gomp_loop_ull_ordered_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
381 struct gomp_thread
*thr
= gomp_thread ();
384 gomp_ordered_sync ();
385 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
386 test
= gomp_iter_ull_static_next (istart
, iend
);
388 gomp_ordered_static_next ();
389 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
395 gomp_loop_ull_ordered_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
397 struct gomp_thread
*thr
= gomp_thread ();
400 gomp_ordered_sync ();
401 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
402 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
404 gomp_ordered_next ();
406 gomp_ordered_last ();
407 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
413 gomp_loop_ull_ordered_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
415 struct gomp_thread
*thr
= gomp_thread ();
418 gomp_ordered_sync ();
419 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
420 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
422 gomp_ordered_next ();
424 gomp_ordered_last ();
425 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
431 GOMP_loop_ull_ordered_runtime_next (gomp_ull
*istart
, gomp_ull
*iend
)
433 struct gomp_thread
*thr
= gomp_thread ();
435 switch (thr
->ts
.work_share
->sched
)
439 return gomp_loop_ull_ordered_static_next (istart
, iend
);
441 return gomp_loop_ull_ordered_dynamic_next (istart
, iend
);
443 return gomp_loop_ull_ordered_guided_next (istart
, iend
);
449 /* We use static functions above so that we're sure that the "runtime"
450 function can defer to the proper routine without interposition. We
451 export the static function with a strong alias when possible, or with
452 a wrapper function otherwise. */
454 #ifdef HAVE_ATTRIBUTE_ALIAS
455 extern __typeof(gomp_loop_ull_static_start
) GOMP_loop_ull_static_start
456 __attribute__((alias ("gomp_loop_ull_static_start")));
457 extern __typeof(gomp_loop_ull_dynamic_start
) GOMP_loop_ull_dynamic_start
458 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
459 extern __typeof(gomp_loop_ull_guided_start
) GOMP_loop_ull_guided_start
460 __attribute__((alias ("gomp_loop_ull_guided_start")));
462 extern __typeof(gomp_loop_ull_ordered_static_start
) GOMP_loop_ull_ordered_static_start
463 __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
464 extern __typeof(gomp_loop_ull_ordered_dynamic_start
) GOMP_loop_ull_ordered_dynamic_start
465 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
466 extern __typeof(gomp_loop_ull_ordered_guided_start
) GOMP_loop_ull_ordered_guided_start
467 __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
469 extern __typeof(gomp_loop_ull_static_next
) GOMP_loop_ull_static_next
470 __attribute__((alias ("gomp_loop_ull_static_next")));
471 extern __typeof(gomp_loop_ull_dynamic_next
) GOMP_loop_ull_dynamic_next
472 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
473 extern __typeof(gomp_loop_ull_guided_next
) GOMP_loop_ull_guided_next
474 __attribute__((alias ("gomp_loop_ull_guided_next")));
476 extern __typeof(gomp_loop_ull_ordered_static_next
) GOMP_loop_ull_ordered_static_next
477 __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
478 extern __typeof(gomp_loop_ull_ordered_dynamic_next
) GOMP_loop_ull_ordered_dynamic_next
479 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
480 extern __typeof(gomp_loop_ull_ordered_guided_next
) GOMP_loop_ull_ordered_guided_next
481 __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
484 GOMP_loop_ull_static_start (bool up
, gomp_ull start
, gomp_ull end
,
485 gomp_ull incr
, gomp_ull chunk_size
,
486 gomp_ull
*istart
, gomp_ull
*iend
)
488 return gomp_loop_ull_static_start (up
, start
, end
, incr
, chunk_size
, istart
,
493 GOMP_loop_ull_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
494 gomp_ull incr
, gomp_ull chunk_size
,
495 gomp_ull
*istart
, gomp_ull
*iend
)
497 return gomp_loop_ull_dynamic_start (up
, start
, end
, incr
, chunk_size
, istart
,
502 GOMP_loop_ull_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
503 gomp_ull incr
, gomp_ull chunk_size
,
504 gomp_ull
*istart
, gomp_ull
*iend
)
506 return gomp_loop_ull_guided_start (up
, start
, end
, incr
, chunk_size
, istart
,
511 GOMP_loop_ull_ordered_static_start (bool up
, gomp_ull start
, gomp_ull end
,
512 gomp_ull incr
, gomp_ull chunk_size
,
513 gomp_ull
*istart
, gomp_ull
*iend
)
515 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
, chunk_size
,
520 GOMP_loop_ull_ordered_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
521 gomp_ull incr
, gomp_ull chunk_size
,
522 gomp_ull
*istart
, gomp_ull
*iend
)
524 return gomp_loop_ull_ordered_dynamic_start (up
, start
, end
, incr
, chunk_size
,
529 GOMP_loop_ull_ordered_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
530 gomp_ull incr
, gomp_ull chunk_size
,
531 gomp_ull
*istart
, gomp_ull
*iend
)
533 return gomp_loop_ull_ordered_guided_start (up
, start
, end
, incr
, chunk_size
,
538 GOMP_loop_ull_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
540 return gomp_loop_ull_static_next (istart
, iend
);
544 GOMP_loop_ull_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
546 return gomp_loop_ull_dynamic_next (istart
, iend
);
550 GOMP_loop_ull_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
552 return gomp_loop_ull_guided_next (istart
, iend
);
556 GOMP_loop_ull_ordered_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
558 return gomp_loop_ull_ordered_static_next (istart
, iend
);
562 GOMP_loop_ull_ordered_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
564 return gomp_loop_ull_ordered_dynamic_next (istart
, iend
);
568 GOMP_loop_ull_ordered_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
570 return gomp_loop_ull_ordered_guided_next (istart
, iend
);