1 /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains routines for managing work-share iteration, both
27 for loops and sections. */
32 typedef unsigned long long gomp_ull
;
34 /* This function implements the STATIC scheduling method. The caller should
35 iterate *pstart <= x < *pend. Return zero if there are more iterations
36 to perform; nonzero if not. Return less than 0 if this thread had
37 received the absolutely last iteration. */
40 gomp_iter_ull_static_next (gomp_ull
*pstart
, gomp_ull
*pend
)
42 struct gomp_thread
*thr
= gomp_thread ();
43 struct gomp_team
*team
= thr
->ts
.team
;
44 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
45 unsigned long nthreads
= team
? team
->nthreads
: 1;
47 if (thr
->ts
.static_trip
== -1)
50 /* Quick test for degenerate teams and orphaned constructs. */
53 *pstart
= ws
->next_ull
;
55 thr
->ts
.static_trip
= -1;
56 return ws
->next_ull
== ws
->end_ull
;
59 /* We interpret chunk_size zero as "unspecified", which means that we
60 should break up the iterations such that each thread makes only one
61 trip through the outer loop. */
62 if (ws
->chunk_size_ull
== 0)
64 gomp_ull n
, q
, i
, t
, s0
, e0
, s
, e
;
66 if (thr
->ts
.static_trip
> 0)
69 /* Compute the total number of iterations. */
70 if (__builtin_expect (ws
->mode
, 0) == 0)
71 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
73 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
76 /* Compute the "zero-based" start and end points. That is, as
77 if the loop began at zero and incremented by one. */
88 /* Notice when no iterations allocated for this thread. */
91 thr
->ts
.static_trip
= 1;
95 /* Transform these to the actual start and end numbers. */
96 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
97 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
101 thr
->ts
.static_trip
= (e0
== n
? -1 : 1);
106 gomp_ull n
, s0
, e0
, i
, c
, s
, e
;
108 /* Otherwise, each thread gets exactly chunk_size iterations
109 (if available) each time through the loop. */
111 if (__builtin_expect (ws
->mode
, 0) == 0)
112 n
= (ws
->end_ull
- ws
->next_ull
+ ws
->incr_ull
- 1) / ws
->incr_ull
;
114 n
= (ws
->next_ull
- ws
->end_ull
- ws
->incr_ull
- 1) / -ws
->incr_ull
;
116 c
= ws
->chunk_size_ull
;
118 /* Initial guess is a C sized chunk positioned nthreads iterations
119 in, offset by our thread number. */
120 s0
= (thr
->ts
.static_trip
* (gomp_ull
) nthreads
+ i
) * c
;
123 /* Detect overflow. */
129 /* Transform these to the actual start and end numbers. */
130 s
= s0
* ws
->incr_ull
+ ws
->next_ull
;
131 e
= e0
* ws
->incr_ull
+ ws
->next_ull
;
137 thr
->ts
.static_trip
= -1;
139 thr
->ts
.static_trip
++;
145 /* This function implements the DYNAMIC scheduling method. Arguments are
146 as for gomp_iter_ull_static_next. This function must be called with
150 gomp_iter_ull_dynamic_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
152 struct gomp_thread
*thr
= gomp_thread ();
153 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
154 gomp_ull start
, end
, chunk
, left
;
156 start
= ws
->next_ull
;
157 if (start
== ws
->end_ull
)
160 chunk
= ws
->chunk_size_ull
;
161 left
= ws
->end_ull
- start
;
162 if (__builtin_expect (ws
->mode
& 2, 0))
181 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
182 /* Similar, but doesn't require the lock held, and uses compare-and-swap
183 instead. Note that the only memory value that changes is ws->next_ull. */
186 gomp_iter_ull_dynamic_next (gomp_ull
*pstart
, gomp_ull
*pend
)
188 struct gomp_thread
*thr
= gomp_thread ();
189 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
190 gomp_ull start
, end
, nend
, chunk
;
193 chunk
= ws
->chunk_size_ull
;
195 if (__builtin_expect (ws
->mode
& 1, 1))
197 gomp_ull tmp
= __sync_fetch_and_add (&ws
->next_ull
, chunk
);
198 if (__builtin_expect (ws
->mode
& 2, 0) == 0)
222 start
= __atomic_load_n (&ws
->next_ull
, MEMMODEL_RELAXED
);
225 gomp_ull left
= end
- start
;
231 if (__builtin_expect (ws
->mode
& 2, 0))
241 nend
= start
+ chunk
;
243 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
244 if (__builtin_expect (tmp
== start
, 1))
254 #endif /* HAVE_SYNC_BUILTINS */
257 /* This function implements the GUIDED scheduling method. Arguments are
258 as for gomp_iter_ull_static_next. This function must be called with the
259 work share lock held. */
262 gomp_iter_ull_guided_next_locked (gomp_ull
*pstart
, gomp_ull
*pend
)
264 struct gomp_thread
*thr
= gomp_thread ();
265 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
266 struct gomp_team
*team
= thr
->ts
.team
;
267 gomp_ull nthreads
= team
? team
->nthreads
: 1;
271 if (ws
->next_ull
== ws
->end_ull
)
274 start
= ws
->next_ull
;
275 if (__builtin_expect (ws
->mode
, 0) == 0)
276 n
= (ws
->end_ull
- start
) / ws
->incr_ull
;
278 n
= (start
- ws
->end_ull
) / -ws
->incr_ull
;
279 q
= (n
+ nthreads
- 1) / nthreads
;
281 if (q
< ws
->chunk_size_ull
)
282 q
= ws
->chunk_size_ull
;
284 end
= start
+ q
* ws
->incr_ull
;
294 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
295 /* Similar, but doesn't require the lock held, and uses compare-and-swap
296 instead. Note that the only memory value that changes is ws->next_ull. */
299 gomp_iter_ull_guided_next (gomp_ull
*pstart
, gomp_ull
*pend
)
301 struct gomp_thread
*thr
= gomp_thread ();
302 struct gomp_work_share
*ws
= thr
->ts
.work_share
;
303 struct gomp_team
*team
= thr
->ts
.team
;
304 gomp_ull nthreads
= team
? team
->nthreads
: 1;
305 gomp_ull start
, end
, nend
, incr
;
308 start
= __atomic_load_n (&ws
->next_ull
, MEMMODEL_RELAXED
);
311 chunk_size
= ws
->chunk_size_ull
;
321 if (__builtin_expect (ws
->mode
, 0) == 0)
322 n
= (end
- start
) / incr
;
324 n
= (start
- end
) / -incr
;
325 q
= (n
+ nthreads
- 1) / nthreads
;
329 if (__builtin_expect (q
<= n
, 1))
330 nend
= start
+ q
* incr
;
334 tmp
= __sync_val_compare_and_swap (&ws
->next_ull
, start
, nend
);
335 if (__builtin_expect (tmp
== start
, 1))
345 #endif /* HAVE_SYNC_BUILTINS */