PR target/65871
[official-gcc.git] / libgomp / loop_ull.c
blobde56ae0b7ced9ffb356741c7b902c62a97b5200c
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the LOOP (FOR/DO) construct. */
28 #include <limits.h>
29 #include <stdlib.h>
30 #include "libgomp.h"
32 typedef unsigned long long gomp_ull;
34 /* Initialize the given work share construct from the given arguments. */
36 static inline void
37 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
38 gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
39 gomp_ull chunk_size)
41 ws->sched = sched;
42 ws->chunk_size_ull = chunk_size;
43 /* Canonicalize loops that have zero iterations to ->next == ->end. */
44 ws->end_ull = ((up && start > end) || (!up && start < end))
45 ? start : end;
46 ws->incr_ull = incr;
47 ws->next_ull = start;
48 ws->mode = 0;
49 if (sched == GFS_DYNAMIC)
51 ws->chunk_size_ull *= incr;
53 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
55 /* For dynamic scheduling prepare things to make each iteration
56 faster. */
57 struct gomp_thread *thr = gomp_thread ();
58 struct gomp_team *team = thr->ts.team;
59 long nthreads = team ? team->nthreads : 1;
61 if (__builtin_expect (up, 1))
63 /* Cheap overflow protection. */
64 if (__builtin_expect ((nthreads | ws->chunk_size_ull)
65 < 1ULL << (sizeof (gomp_ull)
66 * __CHAR_BIT__ / 2 - 1), 1))
67 ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
68 - (nthreads + 1) * ws->chunk_size_ull);
70 /* Cheap overflow protection. */
71 else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
72 < 1ULL << (sizeof (gomp_ull)
73 * __CHAR_BIT__ / 2 - 1), 1))
74 ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
75 - (__LONG_LONG_MAX__ * 2ULL + 1));
77 #endif
79 if (!up)
80 ws->mode |= 2;
83 /* The *_start routines are called when first encountering a loop construct
84 that is not bound directly to a parallel construct. The first thread
85 that arrives will create the work-share construct; subsequent threads
86 will see the construct exists and allocate work from it.
88 START, END, INCR are the bounds of the loop; due to the restrictions of
89 OpenMP, these values must be the same in every thread. This is not
90 verified (nor is it entirely verifiable, since START is not necessarily
91 retained intact in the work-share data structure). CHUNK_SIZE is the
92 scheduling parameter; again this must be identical in all threads.
94 Returns true if there's any work for this thread to perform. If so,
95 *ISTART and *IEND are filled with the bounds of the iteration block
96 allocated to this thread. Returns false if all work was assigned to
97 other threads prior to this thread's arrival. */
99 static bool
100 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
101 gomp_ull incr, gomp_ull chunk_size,
102 gomp_ull *istart, gomp_ull *iend)
104 struct gomp_thread *thr = gomp_thread ();
106 thr->ts.static_trip = 0;
107 if (gomp_work_share_start (false))
109 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
110 GFS_STATIC, chunk_size);
111 gomp_work_share_init_done ();
114 return !gomp_iter_ull_static_next (istart, iend);
117 static bool
118 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
119 gomp_ull incr, gomp_ull chunk_size,
120 gomp_ull *istart, gomp_ull *iend)
122 struct gomp_thread *thr = gomp_thread ();
123 bool ret;
125 if (gomp_work_share_start (false))
127 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
128 GFS_DYNAMIC, chunk_size);
129 gomp_work_share_init_done ();
132 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
133 ret = gomp_iter_ull_dynamic_next (istart, iend);
134 #else
135 gomp_mutex_lock (&thr->ts.work_share->lock);
136 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
137 gomp_mutex_unlock (&thr->ts.work_share->lock);
138 #endif
140 return ret;
143 static bool
144 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
145 gomp_ull incr, gomp_ull chunk_size,
146 gomp_ull *istart, gomp_ull *iend)
148 struct gomp_thread *thr = gomp_thread ();
149 bool ret;
151 if (gomp_work_share_start (false))
153 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
154 GFS_GUIDED, chunk_size);
155 gomp_work_share_init_done ();
158 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
159 ret = gomp_iter_ull_guided_next (istart, iend);
160 #else
161 gomp_mutex_lock (&thr->ts.work_share->lock);
162 ret = gomp_iter_ull_guided_next_locked (istart, iend);
163 gomp_mutex_unlock (&thr->ts.work_share->lock);
164 #endif
166 return ret;
169 bool
170 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
171 gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
173 struct gomp_task_icv *icv = gomp_icv (false);
174 switch (icv->run_sched_var)
176 case GFS_STATIC:
177 return gomp_loop_ull_static_start (up, start, end, incr,
178 icv->run_sched_modifier,
179 istart, iend);
180 case GFS_DYNAMIC:
181 return gomp_loop_ull_dynamic_start (up, start, end, incr,
182 icv->run_sched_modifier,
183 istart, iend);
184 case GFS_GUIDED:
185 return gomp_loop_ull_guided_start (up, start, end, incr,
186 icv->run_sched_modifier,
187 istart, iend);
188 case GFS_AUTO:
189 /* For now map to schedule(static), later on we could play with feedback
190 driven choice. */
191 return gomp_loop_ull_static_start (up, start, end, incr,
192 0, istart, iend);
193 default:
194 abort ();
198 /* The *_ordered_*_start routines are similar. The only difference is that
199 this work-share construct is initialized to expect an ORDERED section. */
201 static bool
202 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
203 gomp_ull incr, gomp_ull chunk_size,
204 gomp_ull *istart, gomp_ull *iend)
206 struct gomp_thread *thr = gomp_thread ();
208 thr->ts.static_trip = 0;
209 if (gomp_work_share_start (true))
211 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
212 GFS_STATIC, chunk_size);
213 gomp_ordered_static_init ();
214 gomp_work_share_init_done ();
217 return !gomp_iter_ull_static_next (istart, iend);
220 static bool
221 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
222 gomp_ull incr, gomp_ull chunk_size,
223 gomp_ull *istart, gomp_ull *iend)
225 struct gomp_thread *thr = gomp_thread ();
226 bool ret;
228 if (gomp_work_share_start (true))
230 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
231 GFS_DYNAMIC, chunk_size);
232 gomp_mutex_lock (&thr->ts.work_share->lock);
233 gomp_work_share_init_done ();
235 else
236 gomp_mutex_lock (&thr->ts.work_share->lock);
238 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
239 if (ret)
240 gomp_ordered_first ();
241 gomp_mutex_unlock (&thr->ts.work_share->lock);
243 return ret;
246 static bool
247 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
248 gomp_ull incr, gomp_ull chunk_size,
249 gomp_ull *istart, gomp_ull *iend)
251 struct gomp_thread *thr = gomp_thread ();
252 bool ret;
254 if (gomp_work_share_start (true))
256 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
257 GFS_GUIDED, chunk_size);
258 gomp_mutex_lock (&thr->ts.work_share->lock);
259 gomp_work_share_init_done ();
261 else
262 gomp_mutex_lock (&thr->ts.work_share->lock);
264 ret = gomp_iter_ull_guided_next_locked (istart, iend);
265 if (ret)
266 gomp_ordered_first ();
267 gomp_mutex_unlock (&thr->ts.work_share->lock);
269 return ret;
272 bool
273 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
274 gomp_ull incr, gomp_ull *istart,
275 gomp_ull *iend)
277 struct gomp_task_icv *icv = gomp_icv (false);
278 switch (icv->run_sched_var)
280 case GFS_STATIC:
281 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
282 icv->run_sched_modifier,
283 istart, iend);
284 case GFS_DYNAMIC:
285 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
286 icv->run_sched_modifier,
287 istart, iend);
288 case GFS_GUIDED:
289 return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
290 icv->run_sched_modifier,
291 istart, iend);
292 case GFS_AUTO:
293 /* For now map to schedule(static), later on we could play with feedback
294 driven choice. */
295 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
296 0, istart, iend);
297 default:
298 abort ();
302 /* The *_next routines are called when the thread completes processing of
303 the iteration block currently assigned to it. If the work-share
304 construct is bound directly to a parallel construct, then the iteration
305 bounds may have been set up before the parallel. In which case, this
306 may be the first iteration for the thread.
308 Returns true if there is work remaining to be performed; *ISTART and
309 *IEND are filled with a new iteration block. Returns false if all work
310 has been assigned. */
312 static bool
313 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
315 return !gomp_iter_ull_static_next (istart, iend);
318 static bool
319 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
321 bool ret;
323 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
324 ret = gomp_iter_ull_dynamic_next (istart, iend);
325 #else
326 struct gomp_thread *thr = gomp_thread ();
327 gomp_mutex_lock (&thr->ts.work_share->lock);
328 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
329 gomp_mutex_unlock (&thr->ts.work_share->lock);
330 #endif
332 return ret;
335 static bool
336 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
338 bool ret;
340 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
341 ret = gomp_iter_ull_guided_next (istart, iend);
342 #else
343 struct gomp_thread *thr = gomp_thread ();
344 gomp_mutex_lock (&thr->ts.work_share->lock);
345 ret = gomp_iter_ull_guided_next_locked (istart, iend);
346 gomp_mutex_unlock (&thr->ts.work_share->lock);
347 #endif
349 return ret;
352 bool
353 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
355 struct gomp_thread *thr = gomp_thread ();
357 switch (thr->ts.work_share->sched)
359 case GFS_STATIC:
360 case GFS_AUTO:
361 return gomp_loop_ull_static_next (istart, iend);
362 case GFS_DYNAMIC:
363 return gomp_loop_ull_dynamic_next (istart, iend);
364 case GFS_GUIDED:
365 return gomp_loop_ull_guided_next (istart, iend);
366 default:
367 abort ();
371 /* The *_ordered_*_next routines are called when the thread completes
372 processing of the iteration block currently assigned to it.
374 Returns true if there is work remaining to be performed; *ISTART and
375 *IEND are filled with a new iteration block. Returns false if all work
376 has been assigned. */
378 static bool
379 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
381 struct gomp_thread *thr = gomp_thread ();
382 int test;
384 gomp_ordered_sync ();
385 gomp_mutex_lock (&thr->ts.work_share->lock);
386 test = gomp_iter_ull_static_next (istart, iend);
387 if (test >= 0)
388 gomp_ordered_static_next ();
389 gomp_mutex_unlock (&thr->ts.work_share->lock);
391 return test == 0;
394 static bool
395 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
397 struct gomp_thread *thr = gomp_thread ();
398 bool ret;
400 gomp_ordered_sync ();
401 gomp_mutex_lock (&thr->ts.work_share->lock);
402 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
403 if (ret)
404 gomp_ordered_next ();
405 else
406 gomp_ordered_last ();
407 gomp_mutex_unlock (&thr->ts.work_share->lock);
409 return ret;
412 static bool
413 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
415 struct gomp_thread *thr = gomp_thread ();
416 bool ret;
418 gomp_ordered_sync ();
419 gomp_mutex_lock (&thr->ts.work_share->lock);
420 ret = gomp_iter_ull_guided_next_locked (istart, iend);
421 if (ret)
422 gomp_ordered_next ();
423 else
424 gomp_ordered_last ();
425 gomp_mutex_unlock (&thr->ts.work_share->lock);
427 return ret;
430 bool
431 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
433 struct gomp_thread *thr = gomp_thread ();
435 switch (thr->ts.work_share->sched)
437 case GFS_STATIC:
438 case GFS_AUTO:
439 return gomp_loop_ull_ordered_static_next (istart, iend);
440 case GFS_DYNAMIC:
441 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
442 case GFS_GUIDED:
443 return gomp_loop_ull_ordered_guided_next (istart, iend);
444 default:
445 abort ();
449 /* We use static functions above so that we're sure that the "runtime"
450 function can defer to the proper routine without interposition. We
451 export the static function with a strong alias when possible, or with
452 a wrapper function otherwise. */
454 #ifdef HAVE_ATTRIBUTE_ALIAS
455 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
456 __attribute__((alias ("gomp_loop_ull_static_start")));
457 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
458 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
459 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
460 __attribute__((alias ("gomp_loop_ull_guided_start")));
462 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
463 __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
464 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
465 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
466 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
467 __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
469 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
470 __attribute__((alias ("gomp_loop_ull_static_next")));
471 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
472 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
473 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
474 __attribute__((alias ("gomp_loop_ull_guided_next")));
476 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
477 __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
478 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
479 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
480 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
481 __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
482 #else
483 bool
484 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
485 gomp_ull incr, gomp_ull chunk_size,
486 gomp_ull *istart, gomp_ull *iend)
488 return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
489 iend);
492 bool
493 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
494 gomp_ull incr, gomp_ull chunk_size,
495 gomp_ull *istart, gomp_ull *iend)
497 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
498 iend);
501 bool
502 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
503 gomp_ull incr, gomp_ull chunk_size,
504 gomp_ull *istart, gomp_ull *iend)
506 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
507 iend);
510 bool
511 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
512 gomp_ull incr, gomp_ull chunk_size,
513 gomp_ull *istart, gomp_ull *iend)
515 return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
516 istart, iend);
519 bool
520 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
521 gomp_ull incr, gomp_ull chunk_size,
522 gomp_ull *istart, gomp_ull *iend)
524 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
525 istart, iend);
528 bool
529 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
530 gomp_ull incr, gomp_ull chunk_size,
531 gomp_ull *istart, gomp_ull *iend)
533 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
534 istart, iend);
537 bool
538 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
540 return gomp_loop_ull_static_next (istart, iend);
543 bool
544 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
546 return gomp_loop_ull_dynamic_next (istart, iend);
549 bool
550 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
552 return gomp_loop_ull_guided_next (istart, iend);
555 bool
556 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
558 return gomp_loop_ull_ordered_static_next (istart, iend);
561 bool
562 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
564 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
567 bool
568 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
570 return gomp_loop_ull_ordered_guided_next (istart, iend);
572 #endif