Try to avoid lea if possible.
[official-gcc.git] / libgomp / iter.c
blob9ec4dbd2252e32eca0bcccc86661d7c32ffcb7d4
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file contains routines for managing work-share iteration, both
26 for loops and sections. */
28 #include "libgomp.h"
29 #include <stdlib.h>
32 /* This function implements the STATIC scheduling method. The caller should
33 iterate *pstart <= x < *pend. Return zero if there are more iterations
34 to perform; nonzero if not. Return less than 0 if this thread had
35 received the absolutely last iteration. */
37 int
38 gomp_iter_static_next (long *pstart, long *pend)
40 struct gomp_thread *thr = gomp_thread ();
41 struct gomp_team *team = thr->ts.team;
42 struct gomp_work_share *ws = thr->ts.work_share;
43 unsigned long nthreads = team ? team->nthreads : 1;
45 if (thr->ts.static_trip == -1)
46 return -1;
48 /* Quick test for degenerate teams and orphaned constructs. */
49 if (nthreads == 1)
51 *pstart = ws->next;
52 *pend = ws->end;
53 thr->ts.static_trip = -1;
54 return ws->next == ws->end;
57 /* We interpret chunk_size zero as "unspecified", which means that we
58 should break up the iterations such that each thread makes only one
59 trip through the outer loop. */
60 if (ws->chunk_size == 0)
62 unsigned long n, q, i;
63 unsigned long s0, e0;
64 long s, e;
66 if (thr->ts.static_trip > 0)
67 return 1;
69 /* Compute the total number of iterations. */
70 s = ws->incr + (ws->incr > 0 ? -1 : 1);
71 n = (ws->end - ws->next + s) / ws->incr;
72 i = thr->ts.team_id;
74 /* Compute the "zero-based" start and end points. That is, as
75 if the loop began at zero and incremented by one. */
76 q = n / nthreads;
77 q += (q * nthreads != n);
78 s0 = q * i;
79 e0 = s0 + q;
80 if (e0 > n)
81 e0 = n;
83 /* Notice when no iterations allocated for this thread. */
84 if (s0 >= e0)
86 thr->ts.static_trip = 1;
87 return 1;
90 /* Transform these to the actual start and end numbers. */
91 s = (long)s0 * ws->incr + ws->next;
92 e = (long)e0 * ws->incr + ws->next;
94 *pstart = s;
95 *pend = e;
96 thr->ts.static_trip = (e0 == n ? -1 : 1);
97 return 0;
99 else
101 unsigned long n, s0, e0, i, c;
102 long s, e;
104 /* Otherwise, each thread gets exactly chunk_size iterations
105 (if available) each time through the loop. */
107 s = ws->incr + (ws->incr > 0 ? -1 : 1);
108 n = (ws->end - ws->next + s) / ws->incr;
109 i = thr->ts.team_id;
110 c = ws->chunk_size;
112 /* Initial guess is a C sized chunk positioned nthreads iterations
113 in, offset by our thread number. */
114 s0 = (thr->ts.static_trip * nthreads + i) * c;
115 e0 = s0 + c;
117 /* Detect overflow. */
118 if (s0 >= n)
119 return 1;
120 if (e0 > n)
121 e0 = n;
123 /* Transform these to the actual start and end numbers. */
124 s = (long)s0 * ws->incr + ws->next;
125 e = (long)e0 * ws->incr + ws->next;
127 *pstart = s;
128 *pend = e;
130 if (e0 == n)
131 thr->ts.static_trip = -1;
132 else
133 thr->ts.static_trip++;
134 return 0;
139 /* This function implements the DYNAMIC scheduling method. Arguments are
140 as for gomp_iter_static_next. This function must be called with ws->lock
141 held. */
143 bool
144 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
146 struct gomp_thread *thr = gomp_thread ();
147 struct gomp_work_share *ws = thr->ts.work_share;
148 long start, end, chunk, left;
150 start = ws->next;
151 if (start == ws->end)
152 return false;
154 chunk = ws->chunk_size;
155 left = ws->end - start;
156 if (ws->incr < 0)
158 if (chunk < left)
159 chunk = left;
161 else
163 if (chunk > left)
164 chunk = left;
166 end = start + chunk;
168 ws->next = end;
169 *pstart = start;
170 *pend = end;
171 return true;
175 #ifdef HAVE_SYNC_BUILTINS
176 /* Similar, but doesn't require the lock held, and uses compare-and-swap
177 instead. Note that the only memory value that changes is ws->next. */
179 bool
180 gomp_iter_dynamic_next (long *pstart, long *pend)
182 struct gomp_thread *thr = gomp_thread ();
183 struct gomp_work_share *ws = thr->ts.work_share;
184 long start, end, nend, chunk, incr;
186 end = ws->end;
187 incr = ws->incr;
188 chunk = ws->chunk_size;
190 if (__builtin_expect (ws->mode, 1))
192 long tmp = __sync_fetch_and_add (&ws->next, chunk);
193 if (incr > 0)
195 if (tmp >= end)
196 return false;
197 nend = tmp + chunk;
198 if (nend > end)
199 nend = end;
200 *pstart = tmp;
201 *pend = nend;
202 return true;
204 else
206 if (tmp <= end)
207 return false;
208 nend = tmp + chunk;
209 if (nend < end)
210 nend = end;
211 *pstart = tmp;
212 *pend = nend;
213 return true;
217 start = ws->next;
218 while (1)
220 long left = end - start;
221 long tmp;
223 if (start == end)
224 return false;
226 if (incr < 0)
228 if (chunk < left)
229 chunk = left;
231 else
233 if (chunk > left)
234 chunk = left;
236 nend = start + chunk;
238 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
239 if (__builtin_expect (tmp == start, 1))
240 break;
242 start = tmp;
245 *pstart = start;
246 *pend = nend;
247 return true;
249 #endif /* HAVE_SYNC_BUILTINS */
252 /* This function implements the GUIDED scheduling method. Arguments are
253 as for gomp_iter_static_next. This function must be called with the
254 work share lock held. */
256 bool
257 gomp_iter_guided_next_locked (long *pstart, long *pend)
259 struct gomp_thread *thr = gomp_thread ();
260 struct gomp_work_share *ws = thr->ts.work_share;
261 struct gomp_team *team = thr->ts.team;
262 unsigned long nthreads = team ? team->nthreads : 1;
263 unsigned long n, q;
264 long start, end;
266 if (ws->next == ws->end)
267 return false;
269 start = ws->next;
270 n = (ws->end - start) / ws->incr;
271 q = (n + nthreads - 1) / nthreads;
273 if (q < ws->chunk_size)
274 q = ws->chunk_size;
275 if (q <= n)
276 end = start + q * ws->incr;
277 else
278 end = ws->end;
280 ws->next = end;
281 *pstart = start;
282 *pend = end;
283 return true;
286 #ifdef HAVE_SYNC_BUILTINS
287 /* Similar, but doesn't require the lock held, and uses compare-and-swap
288 instead. Note that the only memory value that changes is ws->next. */
290 bool
291 gomp_iter_guided_next (long *pstart, long *pend)
293 struct gomp_thread *thr = gomp_thread ();
294 struct gomp_work_share *ws = thr->ts.work_share;
295 struct gomp_team *team = thr->ts.team;
296 unsigned long nthreads = team ? team->nthreads : 1;
297 long start, end, nend, incr;
298 unsigned long chunk_size;
300 start = ws->next;
301 end = ws->end;
302 incr = ws->incr;
303 chunk_size = ws->chunk_size;
305 while (1)
307 unsigned long n, q;
308 long tmp;
310 if (start == end)
311 return false;
313 n = (end - start) / incr;
314 q = (n + nthreads - 1) / nthreads;
316 if (q < chunk_size)
317 q = chunk_size;
318 if (__builtin_expect (q <= n, 1))
319 nend = start + q * incr;
320 else
321 nend = end;
323 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
324 if (__builtin_expect (tmp == start, 1))
325 break;
327 start = tmp;
330 *pstart = start;
331 *pend = nend;
332 return true;
334 #endif /* HAVE_SYNC_BUILTINS */