1 /* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
16 You should have received a copy of the GNU Lesser General Public License
17 along with libgomp; see the file COPYING.LIB. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 MA 02110-1301, USA. */
21 /* As a special exception, if you link this library with other files, some
22 of which are compiled with GCC, to produce an executable, this library
23 does not by itself cause the resulting executable to be covered by the
24 GNU General Public License. This exception does not however invalidate
25 any other reasons why the executable file might be covered by the GNU
26 General Public License. */
28 /* This file contains routines for managing work-share iteration, both
29 for loops and sections. */
35 /* This function implements the STATIC scheduling method. The caller should
36 iterate *pstart <= x < *pend. Return zero if there are more iterations
37 to perform; nonzero if not. Return less than 0 if this thread had
38 received the absolutely last iteration. */
41 gomp_iter_static_next (long *pstart
, long *pend
)
43 struct gomp_thread
*thr
= gomp_thread ();
44 struct gomp_team
*team
= thr
->ts
.team
;
45 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
46 unsigned long nthreads
= team
? team
->nthreads
: 1;
48 if (thr
->ts
.static_trip
== -1)
51 /* Quick test for degenerate teams and orphaned constructs. */
56 thr
->ts
.static_trip
= -1;
57 return ws
->next
== ws
->end
;
60 /* We interpret chunk_size zero as "unspecified", which means that we
61 should break up the iterations such that each thread makes only one
62 trip through the outer loop. */
63 if (ws
->chunk_size
== 0)
65 unsigned long n
, q
, i
;
69 if (thr
->ts
.static_trip
> 0)
72 /* Compute the total number of iterations. */
73 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
74 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
77 /* Compute the "zero-based" start and end points. That is, as
78 if the loop began at zero and incremented by one. */
80 q
+= (q
* nthreads
!= n
);
86 /* Notice when no iterations allocated for this thread. */
89 thr
->ts
.static_trip
= 1;
93 /* Transform these to the actual start and end numbers. */
94 s
= (long)s0
* ws
->incr
+ ws
->next
;
95 e
= (long)e0
* ws
->incr
+ ws
->next
;
99 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
104 unsigned long n
, s0
, e0
, i
, c
;
107 /* Otherwise, each thread gets exactly chunk_size iterations
108 (if available) each time through the loop. */
110 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
111 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
115 /* Initial guess is a C sized chunk positioned nthreads iterations
116 in, offset by our thread number. */
117 s0
= (thr
->ts
.static_trip
* nthreads
+ i
) * c
;
120 /* Detect overflow. */
126 /* Transform these to the actual start and end numbers. */
127 s
= (long)s0
* ws
->incr
+ ws
->next
;
128 e
= (long)e0
* ws
->incr
+ ws
->next
;
134 thr
->ts
.static_trip
= -1;
136 thr
->ts
.static_trip
++;
142 /* This function implements the DYNAMIC scheduling method. Arguments are
143 as for gomp_iter_static_next. This function must be called with ws->lock
147 gomp_iter_dynamic_next_locked (long *pstart
, long *pend
)
149 struct gomp_thread
*thr
= gomp_thread ();
150 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
151 long start
, end
, chunk
, left
;
154 if (start
== ws
->end
)
157 chunk
= ws
->chunk_size
;
158 left
= ws
->end
- start
;
178 #ifdef HAVE_SYNC_BUILTINS
179 /* Similar, but doesn't require the lock held, and uses compare-and-swap
180 instead. Note that the only memory value that changes is ws->next. */
183 gomp_iter_dynamic_next (long *pstart
, long *pend
)
185 struct gomp_thread
*thr
= gomp_thread ();
186 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
187 long start
, end
, nend
, chunk
, incr
;
191 chunk
= ws
->chunk_size
;
193 if (__builtin_expect (ws
->mode
, 1))
195 long tmp
= __sync_fetch_and_add (&ws
->next
, chunk
);
223 long left
= end
- start
;
239 nend
= start
+ chunk
;
241 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
242 if (__builtin_expect (tmp
== start
, 1))
252 #endif /* HAVE_SYNC_BUILTINS */
255 /* This function implements the GUIDED scheduling method. Arguments are
256 as for gomp_iter_static_next. This function must be called with the
257 work share lock held. */
260 gomp_iter_guided_next_locked (long *pstart
, long *pend
)
262 struct gomp_thread
*thr
= gomp_thread ();
263 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
264 struct gomp_team
*team
= thr
->ts
.team
;
265 unsigned long nthreads
= team
? team
->nthreads
: 1;
269 if (ws
->next
== ws
->end
)
273 n
= (ws
->end
- start
) / ws
->incr
;
274 q
= (n
+ nthreads
- 1) / nthreads
;
276 if (q
< ws
->chunk_size
)
279 end
= start
+ q
* ws
->incr
;
289 #ifdef HAVE_SYNC_BUILTINS
290 /* Similar, but doesn't require the lock held, and uses compare-and-swap
291 instead. Note that the only memory value that changes is ws->next. */
294 gomp_iter_guided_next (long *pstart
, long *pend
)
296 struct gomp_thread
*thr
= gomp_thread ();
297 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
298 struct gomp_team
*team
= thr
->ts
.team
;
299 unsigned long nthreads
= team
? team
->nthreads
: 1;
300 long start
, end
, nend
, incr
;
301 unsigned long chunk_size
;
306 chunk_size
= ws
->chunk_size
;
316 n
= (end
- start
) / incr
;
317 q
= (n
+ nthreads
- 1) / nthreads
;
321 if (__builtin_expect (q
<= n
, 1))
322 nend
= start
+ q
* incr
;
326 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
327 if (__builtin_expect (tmp
== start
, 1))
337 #endif /* HAVE_SYNC_BUILTINS */