1 /* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
16 You should have received a copy of the GNU Lesser General Public License
17 along with libgomp; see the file COPYING.LIB. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 MA 02110-1301, USA. */
21 /* As a special exception, if you link this library with other files, some
22 of which are compiled with GCC, to produce an executable, this library
23 does not by itself cause the resulting executable to be covered by the
24 GNU General Public License. This exception does not however invalidate
25 any other reasons why the executable file might be covered by the GNU
26 General Public License. */
28 /* This file contains routines for managing work-share iteration, both
29 for loops and sections. */
34 typedef unsigned long long gomp_ull
;
36 /* This function implements the STATIC scheduling method. The caller should
37 iterate *pstart <= x < *pend. Return zero if there are more iterations
38 to perform; nonzero if not. Return less than 0 if this thread had
39 received the absolutely last iteration. */
42 gomp_iter_ull_static_next (gomp_ull
*pstart
, gomp_ull
*pend
)
44 struct gomp_thread
*thr
= gomp_thread ();
45 struct gomp_team
*team
= thr
->ts
.team
;
46 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
47 unsigned long nthreads
= team
? team
->nthreads
: 1;
49 if (thr
->ts
.static_trip
== -1)
52 /* Quick test for degenerate teams and orphaned constructs. */
55 *pstart
= ws
->next_ull
;
57 thr
->ts
.static_trip
= -1;
58 return ws
->next_ull
== ws
->end_ull
;
61 /* We interpret chunk_size zero as "unspecified", which means that we
62 should break up the iterations such that each thread makes only one
63 trip through the outer loop. */
64 if (ws
->chunk_size_ull
== 0)
66 gomp_ull n
, q
, i
, s0
, e0
, s
, e
;
68 if (thr
->ts
.static_trip
> 0)
71 /* Compute the total number of iterations. */
72 if (__builtin_expect (ws
->mode
, 0) == 0)
73 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
75 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
78 /* Compute the "zero-based" start and end points. That is, as
79 if the loop began at zero and incremented by one. */
81 q
+= (q
* nthreads
!= n
);
87 /* Notice when no iterations allocated for this thread. */
90 thr
->ts
.static_trip
= 1;
94 /* Transform these to the actual start and end numbers. */
95 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
96 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
100 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
105 gomp_ull n
, s0
, e0
, i
, c
, s
, e
;
107 /* Otherwise, each thread gets exactly chunk_size iterations
108 (if available) each time through the loop. */
110 if (__builtin_expect (ws
->mode
, 0) == 0)
111 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
113 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
115 c
= ws
->chunk_size_ull
;
117 /* Initial guess is a C sized chunk positioned nthreads iterations
118 in, offset by our thread number. */
119 s0
= (thr
->ts
.static_trip
* (gomp_ull
) nthreads
+ i
) * c
;
122 /* Detect overflow. */
128 /* Transform these to the actual start and end numbers. */
129 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
130 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
136 thr
->ts
.static_trip
= -1;
138 thr
->ts
.static_trip
++;
144 /* This function implements the DYNAMIC scheduling method. Arguments are
145 as for gomp_iter_ull_static_next. This function must be called with
149 gomp_iter_ull_dynamic_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
151 struct gomp_thread
*thr
= gomp_thread ();
152 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
153 gomp_ull start
, end
, chunk
, left
;
155 start
= ws
->next_ull
;
156 if (start
== ws
->end_ull
)
159 chunk
= ws
->chunk_size_ull
;
160 left
= ws
->end_ull
- start
;
161 if (__builtin_expect (ws
->mode
& 2, 0))
180 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
181 /* Similar, but doesn't require the lock held, and uses compare-and-swap
182 instead. Note that the only memory value that changes is ws->next_ull. */
185 gomp_iter_ull_dynamic_next (gomp_ull
*pstart
, gomp_ull
*pend
)
187 struct gomp_thread
*thr
= gomp_thread ();
188 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
189 gomp_ull start
, end
, nend
, chunk
;
192 chunk
= ws
->chunk_size_ull
;
194 if (__builtin_expect (ws
->mode
& 1, 1))
196 gomp_ull tmp
= __sync_fetch_and_add (&ws
->next_ull
, chunk
);
197 if (__builtin_expect (ws
->mode
& 2, 0) == 0)
221 start
= ws
->next_ull
;
224 gomp_ull left
= end
- start
;
230 if (__builtin_expect (ws
->mode
& 2, 0))
240 nend
= start
+ chunk
;
242 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
243 if (__builtin_expect (tmp
== start
, 1))
253 #endif /* HAVE_SYNC_BUILTINS */
256 /* This function implements the GUIDED scheduling method. Arguments are
257 as for gomp_iter_ull_static_next. This function must be called with the
258 work share lock held. */
261 gomp_iter_ull_guided_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
263 struct gomp_thread
*thr
= gomp_thread ();
264 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
265 struct gomp_team
*team
= thr
->ts
.team
;
266 gomp_ull nthreads
= team
? team
->nthreads
: 1;
270 if (ws
->next_ull
== ws
->end_ull
)
273 start
= ws
->next_ull
;
274 if (__builtin_expect (ws
->mode
, 0) == 0)
275 n
= (ws
->end_ull
- start
) / ws
->incr_ull
;
277 n
= (start
- ws
->end_ull
) / -ws
->incr_ull
;
278 q
= (n
+ nthreads
- 1) / nthreads
;
280 if (q
< ws
->chunk_size_ull
)
281 q
= ws
->chunk_size_ull
;
283 end
= start
+ q
* ws
->incr_ull
;
293 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
294 /* Similar, but doesn't require the lock held, and uses compare-and-swap
295 instead. Note that the only memory value that changes is ws->next_ull. */
298 gomp_iter_ull_guided_next (gomp_ull
*pstart
, gomp_ull
*pend
)
300 struct gomp_thread
*thr
= gomp_thread ();
301 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
302 struct gomp_team
*team
= thr
->ts
.team
;
303 gomp_ull nthreads
= team
? team
->nthreads
: 1;
304 gomp_ull start
, end
, nend
, incr
;
307 start
= ws
->next_ull
;
310 chunk_size
= ws
->chunk_size_ull
;
320 if (__builtin_expect (ws
->mode
, 0) == 0)
321 n
= (end
- start
) / incr
;
323 n
= (start
- end
) / -incr
;
324 q
= (n
+ nthreads
- 1) / nthreads
;
328 if (__builtin_expect (q
<= n
, 1))
329 nend
= start
+ q
* incr
;
333 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
334 if (__builtin_expect (tmp
== start
, 1))
344 #endif /* HAVE_SYNC_BUILTINS */