libgomp/iter.c

   1 /* Copyright (C) 2005 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU OpenMP Library (libgomp).
   5
   6    Libgomp is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU Lesser General Public License as published by
   8    the Free Software Foundation; either version 2.1 of the License, or
   9    (at your option) any later version.
  10
  11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13    FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
  14    more details.
  15
  16    You should have received a copy of the GNU Lesser General Public License
  17    along with libgomp; see the file COPYING.LIB.  If not, write to the
  18    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19    MA 02110-1301, USA.  */
  20
  21 /* As a special exception, if you link this library with other files, some
  22    of which are compiled with GCC, to produce an executable, this library
  23    does not by itself cause the resulting executable to be covered by the
  24    GNU General Public License.  This exception does not however invalidate
  25    any other reasons why the executable file might be covered by the GNU
  26    General Public License.  */
  27
  28 /* This file contains routines for managing work-share iteration, both
  29    for loops and sections.  */
  30
  31 #include "libgomp.h"
  32 #include <stdlib.h>
  33
  34
  35 /* This function implements the STATIC scheduling method.  The caller should
  36    iterate *pstart <= x < *pend.  Return zero if there are more iterations
  37    to perform; nonzero if not.  Return less than 0 if this thread had
  38    received the absolutely last iteration.  */
  39
  40 int
  41 gomp_iter_static_next (long *pstart, long *pend)
  42 {
  43   struct gomp_thread *thr = gomp_thread ();
  44   struct gomp_team *team = thr->ts.team;
  45   struct gomp_work_share *ws = thr->ts.work_share;
  46   unsigned long nthreads = team ? team->nthreads : 1;
  47
  48   if (thr->ts.static_trip == -1)
  49     return -1;
  50
  51   /* Quick test for degenerate teams and orphaned constructs.  */
  52   if (nthreads == 1)
  53     {
  54       *pstart = ws->next;
  55       *pend = ws->end;
  56       thr->ts.static_trip = -1;
  57       return ws->next == ws->end;
  58     }
  59
  60   /* We interpret chunk_size zero as "unspecified", which means that we
  61      should break up the iterations such that each thread makes only one
  62      trip through the outer loop.  */
  63   if (ws->chunk_size == 0)
  64     {
  65       unsigned long n, q, i;
  66       unsigned long s0, e0;
  67       long s, e;
  68
  69       if (thr->ts.static_trip > 0)
  70         return 1;
  71
  72       /* Compute the total number of iterations.  */
  73       s = ws->incr + (ws->incr > 0 ? -1 : 1);
  74       n = (ws->end - ws->next + s) / ws->incr;
  75       i = thr->ts.team_id;
  76
  77       /* Compute the "zero-based" start and end points.  That is, as
  78          if the loop began at zero and incremented by one.  */
  79       q = n / nthreads;
  80       q += (q * nthreads != n);
  81       s0 = q * i;
  82       e0 = s0 + q;
  83       if (e0 > n)
  84         e0 = n;
  85
  86       /* Notice when no iterations allocated for this thread.  */
  87       if (s0 >= e0)
  88         {
  89           thr->ts.static_trip = 1;
  90           return 1;
  91         }
  92
  93       /* Transform these to the actual start and end numbers.  */
  94       s = (long)s0 * ws->incr + ws->next;
  95       e = (long)e0 * ws->incr + ws->next;
  96
  97       *pstart = s;
  98       *pend = e;
  99       thr->ts.static_trip = (e0 == n ? -1 : 1);
 100       return 0;
 101     }
 102   else
 103     {
 104       unsigned long n, s0, e0, i, c;
 105       long s, e;
 106
 107       /* Otherwise, each thread gets exactly chunk_size iterations
 108          (if available) each time through the loop.  */
 109
 110       s = ws->incr + (ws->incr > 0 ? -1 : 1);
 111       n = (ws->end - ws->next + s) / ws->incr;
 112       i = thr->ts.team_id;
 113       c = ws->chunk_size;
 114
 115       /* Initial guess is a C sized chunk positioned nthreads iterations
 116          in, offset by our thread number.  */
 117       s0 = (thr->ts.static_trip * nthreads + i) * c;
 118       e0 = s0 + c;
 119
 120       /* Detect overflow.  */
 121       if (s0 >= n)
 122         return 1;
 123       if (e0 > n)
 124         e0 = n;
 125
 126       /* Transform these to the actual start and end numbers.  */
 127       s = (long)s0 * ws->incr + ws->next;
 128       e = (long)e0 * ws->incr + ws->next;
 129
 130       *pstart = s;
 131       *pend = e;
 132
 133       if (e0 == n)
 134         thr->ts.static_trip = -1;
 135       else
 136         thr->ts.static_trip++;
 137       return 0;
 138     }
 139 }
 140
 141
 142 /* This function implements the DYNAMIC scheduling method.  Arguments are
 143    as for gomp_iter_static_next.  This function must be called with ws->lock
 144    held.  */
 145
 146 bool
 147 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
 148 {
 149   struct gomp_thread *thr = gomp_thread ();
 150   struct gomp_work_share *ws = thr->ts.work_share;
 151   long start, end, chunk, left;
 152
 153   start = ws->next;
 154   if (start == ws->end)
 155     return false;
 156
 157   chunk = ws->chunk_size * ws->incr;
 158   left = ws->end - start;
 159   if (ws->incr < 0)
 160     {
 161       if (chunk < left)
 162         chunk = left;
 163     }
 164   else
 165     {
 166       if (chunk > left)
 167         chunk = left;
 168     }
 169   end = start + chunk;
 170
 171   ws->next = end;
 172   *pstart = start;
 173   *pend = end;
 174   return true;
 175 }
 176
 177
 178 #ifdef HAVE_SYNC_BUILTINS
 179 /* Similar, but doesn't require the lock held, and uses compare-and-swap
 180    instead.  Note that the only memory value that changes is ws->next.  */
 181
 182 bool
 183 gomp_iter_dynamic_next (long *pstart, long *pend)
 184 {
 185   struct gomp_thread *thr = gomp_thread ();
 186   struct gomp_work_share *ws = thr->ts.work_share;
 187   long start, end, nend, chunk, incr;
 188
 189   start = ws->next;
 190   end = ws->end;
 191   incr = ws->incr;
 192   chunk = ws->chunk_size * incr;
 193
 194   while (1)
 195     {
 196       long left = end - start;
 197       long tmp;
 198
 199       if (start == end)
 200         return false;
 201
 202       if (incr < 0)
 203         {
 204           if (chunk < left)
 205             chunk = left;
 206         }
 207       else
 208         {
 209           if (chunk > left)
 210             chunk = left;
 211         }
 212       nend = start + chunk;
 213
 214       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
 215       if (__builtin_expect (tmp == start, 1))
 216         break;
 217
 218       start = tmp;
 219     }
 220
 221   *pstart = start;
 222   *pend = nend;
 223   return true;
 224 }
 225 #endif /* HAVE_SYNC_BUILTINS */
 226
 227
 228 /* This function implements the GUIDED scheduling method.  Arguments are
 229    as for gomp_iter_static_next.  This function must be called with the
 230    work share lock held.  */
 231
 232 bool
 233 gomp_iter_guided_next_locked (long *pstart, long *pend)
 234 {
 235   struct gomp_thread *thr = gomp_thread ();
 236   struct gomp_work_share *ws = thr->ts.work_share;
 237   struct gomp_team *team = thr->ts.team;
 238   unsigned long nthreads = team ? team->nthreads : 1;
 239   unsigned long n, q;
 240   long start, end;
 241
 242   if (ws->next == ws->end)
 243     return false;
 244
 245   n = (ws->end - ws->next) / ws->incr;
 246   q = (n + nthreads - 1) / nthreads;
 247
 248   if (q < ws->chunk_size)
 249     q = ws->chunk_size;
 250   if (q > n)
 251     q = n;
 252
 253   start = ws->next;
 254   end = start + q * ws->incr;
 255
 256   ws->next = end;
 257   *pstart = start;
 258   *pend = end;
 259   return true;
 260 }
 261
 262 #ifdef HAVE_SYNC_BUILTINS
 263 /* Similar, but doesn't require the lock held, and uses compare-and-swap
 264    instead.  Note that the only memory value that changes is ws->next.  */
 265
 266 bool
 267 gomp_iter_guided_next (long *pstart, long *pend)
 268 {
 269   struct gomp_thread *thr = gomp_thread ();
 270   struct gomp_work_share *ws = thr->ts.work_share;
 271   struct gomp_team *team = thr->ts.team;
 272   unsigned long nthreads = team ? team->nthreads : 1;
 273   long start, end, nend, incr;
 274   unsigned long chunk_size;
 275
 276   start = ws->next;
 277   end = ws->end;
 278   incr = ws->incr;
 279   chunk_size = ws->chunk_size;
 280
 281   while (1)
 282     {
 283       unsigned long n, q;
 284       long tmp;
 285
 286       if (start == end)
 287         return false;
 288
 289       n = (end - start) / ws->incr;
 290       q = (n + nthreads - 1) / nthreads;
 291
 292       if (q < chunk_size)
 293         q = chunk_size;
 294       if (q > n)
 295         q = n;
 296
 297       nend = start + q * incr;
 298
 299       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
 300       if (__builtin_expect (tmp == start, 1))
 301         break;
 302
 303       start = tmp;
 304     }
 305
 306   *pstart = start;
 307   *pend = nend;
 308   return true;
 309 }
 310 #endif /* HAVE_SYNC_BUILTINS */