Fix PR tree-optimization/40219
[official-gcc.git] / libgomp / iter_ull.c
blob1754e6333c2d5f4b0266bfeba840b119eba3481c
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file contains routines for managing work-share iteration, both
26 for loops and sections. */
28 #include "libgomp.h"
29 #include <stdlib.h>
31 typedef unsigned long long gomp_ull;
33 /* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
38 int
39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
41 struct gomp_thread *thr = gomp_thread ();
42 struct gomp_team *team = thr->ts.team;
43 struct gomp_work_share *ws = thr->ts.work_share;
44 unsigned long nthreads = team ? team->nthreads : 1;
46 if (thr->ts.static_trip == -1)
47 return -1;
49 /* Quick test for degenerate teams and orphaned constructs. */
50 if (nthreads == 1)
52 *pstart = ws->next_ull;
53 *pend = ws->end_ull;
54 thr->ts.static_trip = -1;
55 return ws->next_ull == ws->end_ull;
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws->chunk_size_ull == 0)
63 gomp_ull n, q, i, s0, e0, s, e;
65 if (thr->ts.static_trip > 0)
66 return 1;
68 /* Compute the total number of iterations. */
69 if (__builtin_expect (ws->mode, 0) == 0)
70 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71 else
72 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73 i = thr->ts.team_id;
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
77 q = n / nthreads;
78 q += (q * nthreads != n);
79 s0 = q * i;
80 e0 = s0 + q;
81 if (e0 > n)
82 e0 = n;
84 /* Notice when no iterations allocated for this thread. */
85 if (s0 >= e0)
87 thr->ts.static_trip = 1;
88 return 1;
91 /* Transform these to the actual start and end numbers. */
92 s = s0 * ws->incr_ull + ws->next_ull;
93 e = e0 * ws->incr_ull + ws->next_ull;
95 *pstart = s;
96 *pend = e;
97 thr->ts.static_trip = (e0 == n ? -1 : 1);
98 return 0;
100 else
102 gomp_ull n, s0, e0, i, c, s, e;
104 /* Otherwise, each thread gets exactly chunk_size iterations
105 (if available) each time through the loop. */
107 if (__builtin_expect (ws->mode, 0) == 0)
108 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109 else
110 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111 i = thr->ts.team_id;
112 c = ws->chunk_size_ull;
114 /* Initial guess is a C sized chunk positioned nthreads iterations
115 in, offset by our thread number. */
116 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117 e0 = s0 + c;
119 /* Detect overflow. */
120 if (s0 >= n)
121 return 1;
122 if (e0 > n)
123 e0 = n;
125 /* Transform these to the actual start and end numbers. */
126 s = s0 * ws->incr_ull + ws->next_ull;
127 e = e0 * ws->incr_ull + ws->next_ull;
129 *pstart = s;
130 *pend = e;
132 if (e0 == n)
133 thr->ts.static_trip = -1;
134 else
135 thr->ts.static_trip++;
136 return 0;
141 /* This function implements the DYNAMIC scheduling method. Arguments are
142 as for gomp_iter_ull_static_next. This function must be called with
143 ws->lock held. */
145 bool
146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
148 struct gomp_thread *thr = gomp_thread ();
149 struct gomp_work_share *ws = thr->ts.work_share;
150 gomp_ull start, end, chunk, left;
152 start = ws->next_ull;
153 if (start == ws->end_ull)
154 return false;
156 chunk = ws->chunk_size_ull;
157 left = ws->end_ull - start;
158 if (__builtin_expect (ws->mode & 2, 0))
160 if (chunk < left)
161 chunk = left;
163 else
165 if (chunk > left)
166 chunk = left;
168 end = start + chunk;
170 ws->next_ull = end;
171 *pstart = start;
172 *pend = end;
173 return true;
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
179 instead. Note that the only memory value that changes is ws->next_ull. */
181 bool
182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
184 struct gomp_thread *thr = gomp_thread ();
185 struct gomp_work_share *ws = thr->ts.work_share;
186 gomp_ull start, end, nend, chunk;
188 end = ws->end_ull;
189 chunk = ws->chunk_size_ull;
191 if (__builtin_expect (ws->mode & 1, 1))
193 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194 if (__builtin_expect (ws->mode & 2, 0) == 0)
196 if (tmp >= end)
197 return false;
198 nend = tmp + chunk;
199 if (nend > end)
200 nend = end;
201 *pstart = tmp;
202 *pend = nend;
203 return true;
205 else
207 if (tmp <= end)
208 return false;
209 nend = tmp + chunk;
210 if (nend < end)
211 nend = end;
212 *pstart = tmp;
213 *pend = nend;
214 return true;
218 start = ws->next_ull;
219 while (1)
221 gomp_ull left = end - start;
222 gomp_ull tmp;
224 if (start == end)
225 return false;
227 if (__builtin_expect (ws->mode & 2, 0))
229 if (chunk < left)
230 chunk = left;
232 else
234 if (chunk > left)
235 chunk = left;
237 nend = start + chunk;
239 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240 if (__builtin_expect (tmp == start, 1))
241 break;
243 start = tmp;
246 *pstart = start;
247 *pend = nend;
248 return true;
250 #endif /* HAVE_SYNC_BUILTINS */
253 /* This function implements the GUIDED scheduling method. Arguments are
254 as for gomp_iter_ull_static_next. This function must be called with the
255 work share lock held. */
257 bool
258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
260 struct gomp_thread *thr = gomp_thread ();
261 struct gomp_work_share *ws = thr->ts.work_share;
262 struct gomp_team *team = thr->ts.team;
263 gomp_ull nthreads = team ? team->nthreads : 1;
264 gomp_ull n, q;
265 gomp_ull start, end;
267 if (ws->next_ull == ws->end_ull)
268 return false;
270 start = ws->next_ull;
271 if (__builtin_expect (ws->mode, 0) == 0)
272 n = (ws->end_ull - start) / ws->incr_ull;
273 else
274 n = (start - ws->end_ull) / -ws->incr_ull;
275 q = (n + nthreads - 1) / nthreads;
277 if (q < ws->chunk_size_ull)
278 q = ws->chunk_size_ull;
279 if (q <= n)
280 end = start + q * ws->incr_ull;
281 else
282 end = ws->end_ull;
284 ws->next_ull = end;
285 *pstart = start;
286 *pend = end;
287 return true;
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next_ull. */
294 bool
295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
297 struct gomp_thread *thr = gomp_thread ();
298 struct gomp_work_share *ws = thr->ts.work_share;
299 struct gomp_team *team = thr->ts.team;
300 gomp_ull nthreads = team ? team->nthreads : 1;
301 gomp_ull start, end, nend, incr;
302 gomp_ull chunk_size;
304 start = ws->next_ull;
305 end = ws->end_ull;
306 incr = ws->incr_ull;
307 chunk_size = ws->chunk_size_ull;
309 while (1)
311 gomp_ull n, q;
312 gomp_ull tmp;
314 if (start == end)
315 return false;
317 if (__builtin_expect (ws->mode, 0) == 0)
318 n = (end - start) / incr;
319 else
320 n = (start - end) / -incr;
321 q = (n + nthreads - 1) / nthreads;
323 if (q < chunk_size)
324 q = chunk_size;
325 if (__builtin_expect (q <= n, 1))
326 nend = start + q * incr;
327 else
328 nend = end;
330 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331 if (__builtin_expect (tmp == start, 1))
332 break;
334 start = tmp;
337 *pstart = start;
338 *pend = nend;
339 return true;
341 #endif /* HAVE_SYNC_BUILTINS */