1 /* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
16 You should have received a copy of the GNU Lesser General Public License
17 along with libgomp; see the file COPYING.LIB. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 MA 02110-1301, USA. */
21 /* As a special exception, if you link this library with other files, some
22 of which are compiled with GCC, to produce an executable, this library
23 does not by itself cause the resulting executable to be covered by the
24 GNU General Public License. This exception does not however invalidate
25 any other reasons why the executable file might be covered by the GNU
26 General Public License. */
28 /* This file handles the LOOP (FOR/DO) construct. */
34 typedef unsigned long long gomp_ull
;
36 /* Initialize the given work share construct from the given arguments. */
39 gomp_loop_ull_init (struct gomp_work_share
*ws
, bool up
, gomp_ull start
,
40 gomp_ull end
, gomp_ull incr
, enum gomp_schedule_type sched
,
44 ws
->chunk_size_ull
= chunk_size
;
45 /* Canonicalize loops that have zero iterations to ->next == ->end. */
46 ws
->end_ull
= ((up
&& start
> end
) || (!up
&& start
< end
))
51 if (sched
== GFS_DYNAMIC
)
53 ws
->chunk_size_ull
*= incr
;
55 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
57 /* For dynamic scheduling prepare things to make each iteration
59 struct gomp_thread
*thr
= gomp_thread ();
60 struct gomp_team
*team
= thr
->ts
.team
;
61 long nthreads
= team
? team
->nthreads
: 1;
63 if (__builtin_expect (up
, 1))
65 /* Cheap overflow protection. */
66 if (__builtin_expect ((nthreads
| ws
->chunk_size_ull
)
67 < 1ULL << (sizeof (gomp_ull
)
68 * __CHAR_BIT__
/ 2 - 1), 1))
69 ws
->mode
= ws
->end_ull
< (__LONG_LONG_MAX__
* 2ULL + 1
70 - (nthreads
+ 1) * ws
->chunk_size_ull
);
72 /* Cheap overflow protection. */
73 else if (__builtin_expect ((nthreads
| -ws
->chunk_size_ull
)
74 < 1ULL << (sizeof (gomp_ull
)
75 * __CHAR_BIT__
/ 2 - 1), 1))
76 ws
->mode
= ws
->end_ull
> ((nthreads
+ 1) * -ws
->chunk_size_ull
77 - (__LONG_LONG_MAX__
* 2ULL + 1));
85 /* The *_start routines are called when first encountering a loop construct
86 that is not bound directly to a parallel construct. The first thread
87 that arrives will create the work-share construct; subsequent threads
88 will see the construct exists and allocate work from it.
90 START, END, INCR are the bounds of the loop; due to the restrictions of
91 OpenMP, these values must be the same in every thread. This is not
92 verified (nor is it entirely verifiable, since START is not necessarily
93 retained intact in the work-share data structure). CHUNK_SIZE is the
94 scheduling parameter; again this must be identical in all threads.
96 Returns true if there's any work for this thread to perform. If so,
97 *ISTART and *IEND are filled with the bounds of the iteration block
98 allocated to this thread. Returns false if all work was assigned to
99 other threads prior to this thread's arrival. */
102 gomp_loop_ull_static_start (bool up
, gomp_ull start
, gomp_ull end
,
103 gomp_ull incr
, gomp_ull chunk_size
,
104 gomp_ull
*istart
, gomp_ull
*iend
)
106 struct gomp_thread
*thr
= gomp_thread ();
108 thr
->ts
.static_trip
= 0;
109 if (gomp_work_share_start (false))
111 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
112 GFS_STATIC
, chunk_size
);
113 gomp_work_share_init_done ();
116 return !gomp_iter_ull_static_next (istart
, iend
);
120 gomp_loop_ull_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
121 gomp_ull incr
, gomp_ull chunk_size
,
122 gomp_ull
*istart
, gomp_ull
*iend
)
124 struct gomp_thread
*thr
= gomp_thread ();
127 if (gomp_work_share_start (false))
129 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
130 GFS_DYNAMIC
, chunk_size
);
131 gomp_work_share_init_done ();
134 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
135 ret
= gomp_iter_ull_dynamic_next (istart
, iend
);
137 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
138 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
139 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
146 gomp_loop_ull_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
147 gomp_ull incr
, gomp_ull chunk_size
,
148 gomp_ull
*istart
, gomp_ull
*iend
)
150 struct gomp_thread
*thr
= gomp_thread ();
153 if (gomp_work_share_start (false))
155 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
156 GFS_GUIDED
, chunk_size
);
157 gomp_work_share_init_done ();
160 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
161 ret
= gomp_iter_ull_guided_next (istart
, iend
);
163 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
164 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
165 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
172 GOMP_loop_ull_runtime_start (bool up
, gomp_ull start
, gomp_ull end
,
173 gomp_ull incr
, gomp_ull
*istart
, gomp_ull
*iend
)
175 struct gomp_task_icv
*icv
= gomp_icv (false);
176 switch (icv
->run_sched_var
)
179 return gomp_loop_ull_static_start (up
, start
, end
, incr
,
180 icv
->run_sched_modifier
,
183 return gomp_loop_ull_dynamic_start (up
, start
, end
, incr
,
184 icv
->run_sched_modifier
,
187 return gomp_loop_ull_guided_start (up
, start
, end
, incr
,
188 icv
->run_sched_modifier
,
191 /* For now map to schedule(static), later on we could play with feedback
193 return gomp_loop_ull_static_start (up
, start
, end
, incr
,
200 /* The *_ordered_*_start routines are similar. The only difference is that
201 this work-share construct is initialized to expect an ORDERED section. */
204 gomp_loop_ull_ordered_static_start (bool up
, gomp_ull start
, gomp_ull end
,
205 gomp_ull incr
, gomp_ull chunk_size
,
206 gomp_ull
*istart
, gomp_ull
*iend
)
208 struct gomp_thread
*thr
= gomp_thread ();
210 thr
->ts
.static_trip
= 0;
211 if (gomp_work_share_start (true))
213 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
214 GFS_STATIC
, chunk_size
);
215 gomp_ordered_static_init ();
216 gomp_work_share_init_done ();
219 return !gomp_iter_ull_static_next (istart
, iend
);
223 gomp_loop_ull_ordered_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
224 gomp_ull incr
, gomp_ull chunk_size
,
225 gomp_ull
*istart
, gomp_ull
*iend
)
227 struct gomp_thread
*thr
= gomp_thread ();
230 if (gomp_work_share_start (true))
232 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
233 GFS_DYNAMIC
, chunk_size
);
234 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
235 gomp_work_share_init_done ();
238 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
240 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
242 gomp_ordered_first ();
243 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
249 gomp_loop_ull_ordered_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
250 gomp_ull incr
, gomp_ull chunk_size
,
251 gomp_ull
*istart
, gomp_ull
*iend
)
253 struct gomp_thread
*thr
= gomp_thread ();
256 if (gomp_work_share_start (true))
258 gomp_loop_ull_init (thr
->ts
.work_share
, up
, start
, end
, incr
,
259 GFS_GUIDED
, chunk_size
);
260 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
261 gomp_work_share_init_done ();
264 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
266 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
268 gomp_ordered_first ();
269 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
275 GOMP_loop_ull_ordered_runtime_start (bool up
, gomp_ull start
, gomp_ull end
,
276 gomp_ull incr
, gomp_ull
*istart
,
279 struct gomp_task_icv
*icv
= gomp_icv (false);
280 switch (icv
->run_sched_var
)
283 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
,
284 icv
->run_sched_modifier
,
287 return gomp_loop_ull_ordered_dynamic_start (up
, start
, end
, incr
,
288 icv
->run_sched_modifier
,
291 return gomp_loop_ull_ordered_guided_start (up
, start
, end
, incr
,
292 icv
->run_sched_modifier
,
295 /* For now map to schedule(static), later on we could play with feedback
297 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
,
304 /* The *_next routines are called when the thread completes processing of
305 the iteration block currently assigned to it. If the work-share
306 construct is bound directly to a parallel construct, then the iteration
307 bounds may have been set up before the parallel. In which case, this
308 may be the first iteration for the thread.
310 Returns true if there is work remaining to be performed; *ISTART and
311 *IEND are filled with a new iteration block. Returns false if all work
312 has been assigned. */
315 gomp_loop_ull_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
317 return !gomp_iter_ull_static_next (istart
, iend
);
321 gomp_loop_ull_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
325 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
326 ret
= gomp_iter_ull_dynamic_next (istart
, iend
);
328 struct gomp_thread
*thr
= gomp_thread ();
329 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
330 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
331 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
338 gomp_loop_ull_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
342 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
343 ret
= gomp_iter_ull_guided_next (istart
, iend
);
345 struct gomp_thread
*thr
= gomp_thread ();
346 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
347 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
348 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
355 GOMP_loop_ull_runtime_next (gomp_ull
*istart
, gomp_ull
*iend
)
357 struct gomp_thread
*thr
= gomp_thread ();
359 switch (thr
->ts
.work_share
->sched
)
363 return gomp_loop_ull_static_next (istart
, iend
);
365 return gomp_loop_ull_dynamic_next (istart
, iend
);
367 return gomp_loop_ull_guided_next (istart
, iend
);
373 /* The *_ordered_*_next routines are called when the thread completes
374 processing of the iteration block currently assigned to it.
376 Returns true if there is work remaining to be performed; *ISTART and
377 *IEND are filled with a new iteration block. Returns false if all work
378 has been assigned. */
381 gomp_loop_ull_ordered_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
383 struct gomp_thread
*thr
= gomp_thread ();
386 gomp_ordered_sync ();
387 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
388 test
= gomp_iter_ull_static_next (istart
, iend
);
390 gomp_ordered_static_next ();
391 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
397 gomp_loop_ull_ordered_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
399 struct gomp_thread
*thr
= gomp_thread ();
402 gomp_ordered_sync ();
403 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
404 ret
= gomp_iter_ull_dynamic_next_locked (istart
, iend
);
406 gomp_ordered_next ();
408 gomp_ordered_last ();
409 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
415 gomp_loop_ull_ordered_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
417 struct gomp_thread
*thr
= gomp_thread ();
420 gomp_ordered_sync ();
421 gomp_mutex_lock (&thr
->ts
.work_share
->lock
);
422 ret
= gomp_iter_ull_guided_next_locked (istart
, iend
);
424 gomp_ordered_next ();
426 gomp_ordered_last ();
427 gomp_mutex_unlock (&thr
->ts
.work_share
->lock
);
433 GOMP_loop_ull_ordered_runtime_next (gomp_ull
*istart
, gomp_ull
*iend
)
435 struct gomp_thread
*thr
= gomp_thread ();
437 switch (thr
->ts
.work_share
->sched
)
441 return gomp_loop_ull_ordered_static_next (istart
, iend
);
443 return gomp_loop_ull_ordered_dynamic_next (istart
, iend
);
445 return gomp_loop_ull_ordered_guided_next (istart
, iend
);
451 /* We use static functions above so that we're sure that the "runtime"
452 function can defer to the proper routine without interposition. We
453 export the static function with a strong alias when possible, or with
454 a wrapper function otherwise. */
456 #ifdef HAVE_ATTRIBUTE_ALIAS
457 extern __typeof(gomp_loop_ull_static_start
) GOMP_loop_ull_static_start
458 __attribute__((alias ("gomp_loop_ull_static_start")));
459 extern __typeof(gomp_loop_ull_dynamic_start
) GOMP_loop_ull_dynamic_start
460 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
461 extern __typeof(gomp_loop_ull_guided_start
) GOMP_loop_ull_guided_start
462 __attribute__((alias ("gomp_loop_ull_guided_start")));
464 extern __typeof(gomp_loop_ull_ordered_static_start
) GOMP_loop_ull_ordered_static_start
465 __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
466 extern __typeof(gomp_loop_ull_ordered_dynamic_start
) GOMP_loop_ull_ordered_dynamic_start
467 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
468 extern __typeof(gomp_loop_ull_ordered_guided_start
) GOMP_loop_ull_ordered_guided_start
469 __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
471 extern __typeof(gomp_loop_ull_static_next
) GOMP_loop_ull_static_next
472 __attribute__((alias ("gomp_loop_ull_static_next")));
473 extern __typeof(gomp_loop_ull_dynamic_next
) GOMP_loop_ull_dynamic_next
474 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
475 extern __typeof(gomp_loop_ull_guided_next
) GOMP_loop_ull_guided_next
476 __attribute__((alias ("gomp_loop_ull_guided_next")));
478 extern __typeof(gomp_loop_ull_ordered_static_next
) GOMP_loop_ull_ordered_static_next
479 __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
480 extern __typeof(gomp_loop_ull_ordered_dynamic_next
) GOMP_loop_ull_ordered_dynamic_next
481 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
482 extern __typeof(gomp_loop_ull_ordered_guided_next
) GOMP_loop_ull_ordered_guided_next
483 __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
486 GOMP_loop_ull_static_start (bool up
, gomp_ull start
, gomp_ull end
,
487 gomp_ull incr
, gomp_ull chunk_size
,
488 gomp_ull
*istart
, gomp_ull
*iend
)
490 return gomp_loop_ull_static_start (up
, start
, end
, incr
, chunk_size
, istart
,
495 GOMP_loop_ull_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
496 gomp_ull incr
, gomp_ull chunk_size
,
497 gomp_ull
*istart
, gomp_ull
*iend
)
499 return gomp_loop_ull_dynamic_start (up
, start
, end
, incr
, chunk_size
, istart
,
504 GOMP_loop_ull_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
505 gomp_ull incr
, gomp_ull chunk_size
,
506 gomp_ull
*istart
, gomp_ull
*iend
)
508 return gomp_loop_ull_guided_start (up
, start
, end
, incr
, chunk_size
, istart
,
513 GOMP_loop_ull_ordered_static_start (bool up
, gomp_ull start
, gomp_ull end
,
514 gomp_ull incr
, gomp_ull chunk_size
,
515 gomp_ull
*istart
, gomp_ull
*iend
)
517 return gomp_loop_ull_ordered_static_start (up
, start
, end
, incr
, chunk_size
,
522 GOMP_loop_ull_ordered_dynamic_start (bool up
, gomp_ull start
, gomp_ull end
,
523 gomp_ull incr
, gomp_ull chunk_size
,
524 gomp_ull
*istart
, gomp_ull
*iend
)
526 return gomp_loop_ull_ordered_dynamic_start (up
, start
, end
, incr
, chunk_size
,
531 GOMP_loop_ull_ordered_guided_start (bool up
, gomp_ull start
, gomp_ull end
,
532 gomp_ull incr
, gomp_ull chunk_size
,
533 gomp_ull
*istart
, gomp_ull
*iend
)
535 return gomp_loop_ull_ordered_guided_start (up
, start
, end
, incr
, chunk_size
,
540 GOMP_loop_ull_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
542 return gomp_loop_ull_static_next (istart
, iend
);
546 GOMP_loop_ull_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
548 return gomp_loop_ull_dynamic_next (istart
, iend
);
552 GOMP_loop_ull_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
554 return gomp_loop_ull_guided_next (istart
, iend
);
558 GOMP_loop_ull_ordered_static_next (gomp_ull
*istart
, gomp_ull
*iend
)
560 return gomp_loop_ull_ordered_static_next (istart
, iend
);
564 GOMP_loop_ull_ordered_dynamic_next (gomp_ull
*istart
, gomp_ull
*iend
)
566 return gomp_loop_ull_ordered_dynamic_next (istart
, iend
);
570 GOMP_loop_ull_ordered_guided_next (gomp_ull
*istart
, gomp_ull
*iend
)
572 return gomp_loop_ull_ordered_guided_next (istart
, iend
);