1 /* Copyright (C) 2005-2016 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains routines for managing work-share iteration, both
27 for loops and sections. */
33 /* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
39 gomp_iter_static_next (long *pstart
, long *pend
)
41 struct gomp_thread
*thr
= gomp_thread ();
42 struct gomp_team
*team
= thr
->ts
.team
;
43 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
44 unsigned long nthreads
= team
? team
->nthreads
: 1;
46 if (thr
->ts
.static_trip
== -1)
49 /* Quick test for degenerate teams and orphaned constructs. */
54 thr
->ts
.static_trip
= -1;
55 return ws
->next
== ws
->end
;
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws
->chunk_size
== 0)
63 unsigned long n
, q
, i
, t
;
67 if (thr
->ts
.static_trip
> 0)
70 /* Compute the total number of iterations. */
71 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
72 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
87 /* Notice when no iterations allocated for this thread. */
90 thr
->ts
.static_trip
= 1;
94 /* Transform these to the actual start and end numbers. */
95 s
= (long)s0
* ws
->incr
+ ws
->next
;
96 e
= (long)e0
* ws
->incr
+ ws
->next
;
100 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
105 unsigned long n
, s0
, e0
, i
, c
;
108 /* Otherwise, each thread gets exactly chunk_size iterations
109 (if available) each time through the loop. */
111 s
= ws
->incr
+ (ws
->incr
> 0 ? -1 : 1);
112 n
= (ws
->end
- ws
->next
+ s
) / ws
->incr
;
116 /* Initial guess is a C sized chunk positioned nthreads iterations
117 in, offset by our thread number. */
118 s0
= (thr
->ts
.static_trip
* nthreads
+ i
) * c
;
121 /* Detect overflow. */
127 /* Transform these to the actual start and end numbers. */
128 s
= (long)s0
* ws
->incr
+ ws
->next
;
129 e
= (long)e0
* ws
->incr
+ ws
->next
;
135 thr
->ts
.static_trip
= -1;
137 thr
->ts
.static_trip
++;
143 /* This function implements the DYNAMIC scheduling method. Arguments are
144 as for gomp_iter_static_next. This function must be called with ws->lock
148 gomp_iter_dynamic_next_locked (long *pstart
, long *pend
)
150 struct gomp_thread
*thr
= gomp_thread ();
151 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
152 long start
, end
, chunk
, left
;
155 if (start
== ws
->end
)
158 chunk
= ws
->chunk_size
;
159 left
= ws
->end
- start
;
179 #ifdef HAVE_SYNC_BUILTINS
180 /* Similar, but doesn't require the lock held, and uses compare-and-swap
181 instead. Note that the only memory value that changes is ws->next. */
184 gomp_iter_dynamic_next (long *pstart
, long *pend
)
186 struct gomp_thread
*thr
= gomp_thread ();
187 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
188 long start
, end
, nend
, chunk
, incr
;
192 chunk
= ws
->chunk_size
;
194 if (__builtin_expect (ws
->mode
, 1))
196 long tmp
= __sync_fetch_and_add (&ws
->next
, chunk
);
221 start
= __atomic_load_n (&ws
->next
, MEMMODEL_RELAXED
);
224 long left
= end
- start
;
240 nend
= start
+ chunk
;
242 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
243 if (__builtin_expect (tmp
== start
, 1))
253 #endif /* HAVE_SYNC_BUILTINS */
256 /* This function implements the GUIDED scheduling method. Arguments are
257 as for gomp_iter_static_next. This function must be called with the
258 work share lock held. */
261 gomp_iter_guided_next_locked (long *pstart
, long *pend
)
263 struct gomp_thread
*thr
= gomp_thread ();
264 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
265 struct gomp_team
*team
= thr
->ts
.team
;
266 unsigned long nthreads
= team
? team
->nthreads
: 1;
270 if (ws
->next
== ws
->end
)
274 n
= (ws
->end
- start
) / ws
->incr
;
275 q
= (n
+ nthreads
- 1) / nthreads
;
277 if (q
< ws
->chunk_size
)
280 end
= start
+ q
* ws
->incr
;
290 #ifdef HAVE_SYNC_BUILTINS
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next. */
295 gomp_iter_guided_next (long *pstart
, long *pend
)
297 struct gomp_thread
*thr
= gomp_thread ();
298 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
299 struct gomp_team
*team
= thr
->ts
.team
;
300 unsigned long nthreads
= team
? team
->nthreads
: 1;
301 long start
, end
, nend
, incr
;
302 unsigned long chunk_size
;
304 start
= __atomic_load_n (&ws
->next
, MEMMODEL_RELAXED
);
307 chunk_size
= ws
->chunk_size
;
317 n
= (end
- start
) / incr
;
318 q
= (n
+ nthreads
- 1) / nthreads
;
322 if (__builtin_expect (q
<= n
, 1))
323 nend
= start
+ q
* incr
;
327 tmp
= __sync_val_compare_and_swap (&ws
->next
, start
, nend
);
328 if (__builtin_expect (tmp
== start
, 1))
338 #endif /* HAVE_SYNC_BUILTINS */