1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file contains routines for managing work-share iteration, both
26 for loops and sections. */
31 typedef unsigned long long gomp_ull
;
33 /* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
39 gomp_iter_ull_static_next (gomp_ull
*pstart
, gomp_ull
*pend
)
41 struct gomp_thread
*thr
= gomp_thread ();
42 struct gomp_team
*team
= thr
->ts
.team
;
43 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
44 unsigned long nthreads
= team
? team
->nthreads
: 1;
46 if (thr
->ts
.static_trip
== -1)
49 /* Quick test for degenerate teams and orphaned constructs. */
52 *pstart
= ws
->next_ull
;
54 thr
->ts
.static_trip
= -1;
55 return ws
->next_ull
== ws
->end_ull
;
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws
->chunk_size_ull
== 0)
63 gomp_ull n
, q
, i
, s0
, e0
, s
, e
;
65 if (thr
->ts
.static_trip
> 0)
68 /* Compute the total number of iterations. */
69 if (__builtin_expect (ws
->mode
, 0) == 0)
70 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
72 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
78 q
+= (q
* nthreads
!= n
);
84 /* Notice when no iterations allocated for this thread. */
87 thr
->ts
.static_trip
= 1;
91 /* Transform these to the actual start and end numbers. */
92 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
93 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
97 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
102 gomp_ull n
, s0
, e0
, i
, c
, s
, e
;
104 /* Otherwise, each thread gets exactly chunk_size iterations
105 (if available) each time through the loop. */
107 if (__builtin_expect (ws
->mode
, 0) == 0)
108 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
110 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
112 c
= ws
->chunk_size_ull
;
114 /* Initial guess is a C sized chunk positioned nthreads iterations
115 in, offset by our thread number. */
116 s0
= (thr
->ts
.static_trip
* (gomp_ull
) nthreads
+ i
) * c
;
119 /* Detect overflow. */
125 /* Transform these to the actual start and end numbers. */
126 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
127 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
133 thr
->ts
.static_trip
= -1;
135 thr
->ts
.static_trip
++;
141 /* This function implements the DYNAMIC scheduling method. Arguments are
142 as for gomp_iter_ull_static_next. This function must be called with
146 gomp_iter_ull_dynamic_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
148 struct gomp_thread
*thr
= gomp_thread ();
149 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
150 gomp_ull start
, end
, chunk
, left
;
152 start
= ws
->next_ull
;
153 if (start
== ws
->end_ull
)
156 chunk
= ws
->chunk_size_ull
;
157 left
= ws
->end_ull
- start
;
158 if (__builtin_expect (ws
->mode
& 2, 0))
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
179 instead. Note that the only memory value that changes is ws->next_ull. */
182 gomp_iter_ull_dynamic_next (gomp_ull
*pstart
, gomp_ull
*pend
)
184 struct gomp_thread
*thr
= gomp_thread ();
185 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
186 gomp_ull start
, end
, nend
, chunk
;
189 chunk
= ws
->chunk_size_ull
;
191 if (__builtin_expect (ws
->mode
& 1, 1))
193 gomp_ull tmp
= __sync_fetch_and_add (&ws
->next_ull
, chunk
);
194 if (__builtin_expect (ws
->mode
& 2, 0) == 0)
218 start
= ws
->next_ull
;
221 gomp_ull left
= end
- start
;
227 if (__builtin_expect (ws
->mode
& 2, 0))
237 nend
= start
+ chunk
;
239 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
240 if (__builtin_expect (tmp
== start
, 1))
250 #endif /* HAVE_SYNC_BUILTINS */
253 /* This function implements the GUIDED scheduling method. Arguments are
254 as for gomp_iter_ull_static_next. This function must be called with the
255 work share lock held. */
258 gomp_iter_ull_guided_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
260 struct gomp_thread
*thr
= gomp_thread ();
261 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
262 struct gomp_team
*team
= thr
->ts
.team
;
263 gomp_ull nthreads
= team
? team
->nthreads
: 1;
267 if (ws
->next_ull
== ws
->end_ull
)
270 start
= ws
->next_ull
;
271 if (__builtin_expect (ws
->mode
, 0) == 0)
272 n
= (ws
->end_ull
- start
) / ws
->incr_ull
;
274 n
= (start
- ws
->end_ull
) / -ws
->incr_ull
;
275 q
= (n
+ nthreads
- 1) / nthreads
;
277 if (q
< ws
->chunk_size_ull
)
278 q
= ws
->chunk_size_ull
;
280 end
= start
+ q
* ws
->incr_ull
;
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next_ull. */
295 gomp_iter_ull_guided_next (gomp_ull
*pstart
, gomp_ull
*pend
)
297 struct gomp_thread
*thr
= gomp_thread ();
298 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
299 struct gomp_team
*team
= thr
->ts
.team
;
300 gomp_ull nthreads
= team
? team
->nthreads
: 1;
301 gomp_ull start
, end
, nend
, incr
;
304 start
= ws
->next_ull
;
307 chunk_size
= ws
->chunk_size_ull
;
317 if (__builtin_expect (ws
->mode
, 0) == 0)
318 n
= (end
- start
) / incr
;
320 n
= (start
- end
) / -incr
;
321 q
= (n
+ nthreads
- 1) / nthreads
;
325 if (__builtin_expect (q
<= n
, 1))
326 nend
= start
+ q
* incr
;
330 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
331 if (__builtin_expect (tmp
== start
, 1))
341 #endif /* HAVE_SYNC_BUILTINS */