1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file contains routines for managing work-share iteration, both
26 for loops and sections. */
32 /* This function implements the STATIC scheduling method. The caller should
33 iterate *pstart <= x < *pend. Return zero if there are more iterations
34 to perform; nonzero if not. Return less than 0 if this thread had
35 received the absolutely last iteration. */
38 gomp_iter_static_next (long *pstart
, long *pend
)
40 struct gomp_thread
*thr
= gomp_thread ();
41 struct gomp_team
*team
= thr
->ts
.team
;
42 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
43 unsigned long nthreads
= team
? team
->nthreads
: 1;
45 if (thr
->ts
.static_trip
== -1)
48 /* Quick test for degenerate teams and orphaned constructs. */
53 thr
->ts
.static_trip
= -1;
54 return ws
->next
== ws
->end
;
57 /* We interpret chunk_size zero as "unspecified", which means that we
58 should break up the iterations such that each thread makes only one
59 trip through the outer loop. */
60 if (ws
->chunk_size
== 0)
62 unsigned long n
, q
, i
;
66 if (thr
->ts
.static_trip
> 0)
69 /* Compute the total number of iterations. */
70 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
71 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
74 /* Compute the "zero-based" start and end points. That is, as
75 if the loop began at zero and incremented by one. */
77 q
+= (q
* nthreads
!= n
);
83 /* Notice when no iterations allocated for this thread. */
86 thr
->ts
.static_trip
= 1;
90 /* Transform these to the actual start and end numbers. */
91 s
= (long)s0
* ws
->incr
+ ws
->next
;
92 e
= (long)e0
* ws
->incr
+ ws
->next
;
96 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
101 unsigned long n
, s0
, e0
, i
, c
;
104 /* Otherwise, each thread gets exactly chunk_size iterations
105 (if available) each time through the loop. */
107 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
108 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
112 /* Initial guess is a C sized chunk positioned nthreads iterations
113 in, offset by our thread number. */
114 s0
= (thr
->ts
.static_trip
* nthreads
+ i
) * c
;
117 /* Detect overflow. */
123 /* Transform these to the actual start and end numbers. */
124 s
= (long)s0
* ws
->incr
+ ws
->next
;
125 e
= (long)e0
* ws
->incr
+ ws
->next
;
131 thr
->ts
.static_trip
= -1;
133 thr
->ts
.static_trip
++;
139 /* This function implements the DYNAMIC scheduling method. Arguments are
140 as for gomp_iter_static_next. This function must be called with ws->lock
144 gomp_iter_dynamic_next_locked (long *pstart
, long *pend
)
146 struct gomp_thread
*thr
= gomp_thread ();
147 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
148 long start
, end
, chunk
, left
;
151 if (start
== ws
->end
)
154 chunk
= ws
->chunk_size
;
155 left
= ws
->end
- start
;
175 #ifdef HAVE_SYNC_BUILTINS
176 /* Similar, but doesn't require the lock held, and uses compare-and-swap
177 instead. Note that the only memory value that changes is ws->next. */
180 gomp_iter_dynamic_next (long *pstart
, long *pend
)
182 struct gomp_thread
*thr
= gomp_thread ();
183 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
184 long start
, end
, nend
, chunk
, incr
;
188 chunk
= ws
->chunk_size
;
190 if (__builtin_expect (ws
->mode
, 1))
192 long tmp
= __sync_fetch_and_add (&ws
->next
, chunk
);
220 long left
= end
- start
;
236 nend
= start
+ chunk
;
238 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
239 if (__builtin_expect (tmp
== start
, 1))
249 #endif /* HAVE_SYNC_BUILTINS */
252 /* This function implements the GUIDED scheduling method. Arguments are
253 as for gomp_iter_static_next. This function must be called with the
254 work share lock held. */
257 gomp_iter_guided_next_locked (long *pstart
, long *pend
)
259 struct gomp_thread
*thr
= gomp_thread ();
260 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
261 struct gomp_team
*team
= thr
->ts
.team
;
262 unsigned long nthreads
= team
? team
->nthreads
: 1;
266 if (ws
->next
== ws
->end
)
270 n
= (ws
->end
- start
) / ws
->incr
;
271 q
= (n
+ nthreads
- 1) / nthreads
;
273 if (q
< ws
->chunk_size
)
276 end
= start
+ q
* ws
->incr
;
286 #ifdef HAVE_SYNC_BUILTINS
287 /* Similar, but doesn't require the lock held, and uses compare-and-swap
288 instead. Note that the only memory value that changes is ws->next. */
291 gomp_iter_guided_next (long *pstart
, long *pend
)
293 struct gomp_thread
*thr
= gomp_thread ();
294 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
295 struct gomp_team
*team
= thr
->ts
.team
;
296 unsigned long nthreads
= team
? team
->nthreads
: 1;
297 long start
, end
, nend
, incr
;
298 unsigned long chunk_size
;
303 chunk_size
= ws
->chunk_size
;
313 n
= (end
- start
) / incr
;
314 q
= (n
+ nthreads
- 1) / nthreads
;
318 if (__builtin_expect (q
<= n
, 1))
319 nend
= start
+ q
* incr
;
323 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
324 if (__builtin_expect (tmp
== start
, 1))
334 #endif /* HAVE_SYNC_BUILTINS */