libgomp/iter.c

   1 /* Copyright (C) 2005-2013 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU OpenMP Library (libgomp).
   5
   6    Libgomp is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14    more details.
  15
  16    Under Section 7 of GPL version 3, you are granted additional
  17    permissions described in the GCC Runtime Library Exception, version
  18    3.1, as published by the Free Software Foundation.
  19
  20    You should have received a copy of the GNU General Public License and
  21    a copy of the GCC Runtime Library Exception along with this program;
  22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23    <http://www.gnu.org/licenses/>.  */
  24
  25 /* This file contains routines for managing work-share iteration, both
  26    for loops and sections.  */
  27
  28 #include "libgomp.h"
  29 #include <stdlib.h>
  30
  31
  32 /* This function implements the STATIC scheduling method.  The caller should
  33    iterate *pstart <= x < *pend.  Return zero if there are more iterations
  34    to perform; nonzero if not.  Return less than 0 if this thread had
  35    received the absolutely last iteration.  */
  36
  37 int
  38 gomp_iter_static_next (long *pstart, long *pend)
  39 {
  40   struct gomp_thread *thr = gomp_thread ();
  41   struct gomp_team *team = thr->ts.team;
  42   struct gomp_work_share *ws = thr->ts.work_share;
  43   unsigned long nthreads = team ? team->nthreads : 1;
  44
  45   if (thr->ts.static_trip == -1)
  46     return -1;
  47
  48   /* Quick test for degenerate teams and orphaned constructs.  */
  49   if (nthreads == 1)
  50     {
  51       *pstart = ws->next;
  52       *pend = ws->end;
  53       thr->ts.static_trip = -1;
  54       return ws->next == ws->end;
  55     }
  56
  57   /* We interpret chunk_size zero as "unspecified", which means that we
  58      should break up the iterations such that each thread makes only one
  59      trip through the outer loop.  */
  60   if (ws->chunk_size == 0)
  61     {
  62       unsigned long n, q, i, t;
  63       unsigned long s0, e0;
  64       long s, e;
  65
  66       if (thr->ts.static_trip > 0)
  67         return 1;
  68
  69       /* Compute the total number of iterations.  */
  70       s = ws->incr + (ws->incr > 0 ? -1 : 1);
  71       n = (ws->end - ws->next + s) / ws->incr;
  72       i = thr->ts.team_id;
  73
  74       /* Compute the "zero-based" start and end points.  That is, as
  75          if the loop began at zero and incremented by one.  */
  76       q = n / nthreads;
  77       t = n % nthreads;
  78       if (i < t)
  79         {
  80           t = 0;
  81           q++;
  82         }
  83       s0 = q * i + t;
  84       e0 = s0 + q;
  85
  86       /* Notice when no iterations allocated for this thread.  */
  87       if (s0 >= e0)
  88         {
  89           thr->ts.static_trip = 1;
  90           return 1;
  91         }
  92
  93       /* Transform these to the actual start and end numbers.  */
  94       s = (long)s0 * ws->incr + ws->next;
  95       e = (long)e0 * ws->incr + ws->next;
  96
  97       *pstart = s;
  98       *pend = e;
  99       thr->ts.static_trip = (e0 == n ? -1 : 1);
 100       return 0;
 101     }
 102   else
 103     {
 104       unsigned long n, s0, e0, i, c;
 105       long s, e;
 106
 107       /* Otherwise, each thread gets exactly chunk_size iterations
 108          (if available) each time through the loop.  */
 109
 110       s = ws->incr + (ws->incr > 0 ? -1 : 1);
 111       n = (ws->end - ws->next + s) / ws->incr;
 112       i = thr->ts.team_id;
 113       c = ws->chunk_size;
 114
 115       /* Initial guess is a C sized chunk positioned nthreads iterations
 116          in, offset by our thread number.  */
 117       s0 = (thr->ts.static_trip * nthreads + i) * c;
 118       e0 = s0 + c;
 119
 120       /* Detect overflow.  */
 121       if (s0 >= n)
 122         return 1;
 123       if (e0 > n)
 124         e0 = n;
 125
 126       /* Transform these to the actual start and end numbers.  */
 127       s = (long)s0 * ws->incr + ws->next;
 128       e = (long)e0 * ws->incr + ws->next;
 129
 130       *pstart = s;
 131       *pend = e;
 132
 133       if (e0 == n)
 134         thr->ts.static_trip = -1;
 135       else
 136         thr->ts.static_trip++;
 137       return 0;
 138     }
 139 }
 140
 141
 142 /* This function implements the DYNAMIC scheduling method.  Arguments are
 143    as for gomp_iter_static_next.  This function must be called with ws->lock
 144    held.  */
 145
 146 bool
 147 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
 148 {
 149   struct gomp_thread *thr = gomp_thread ();
 150   struct gomp_work_share *ws = thr->ts.work_share;
 151   long start, end, chunk, left;
 152
 153   start = ws->next;
 154   if (start == ws->end)
 155     return false;
 156
 157   chunk = ws->chunk_size;
 158   left = ws->end - start;
 159   if (ws->incr < 0)
 160     {
 161       if (chunk < left)
 162         chunk = left;
 163     }
 164   else
 165     {
 166       if (chunk > left)
 167         chunk = left;
 168     }
 169   end = start + chunk;
 170
 171   ws->next = end;
 172   *pstart = start;
 173   *pend = end;
 174   return true;
 175 }
 176
 177
 178 #ifdef HAVE_SYNC_BUILTINS
 179 /* Similar, but doesn't require the lock held, and uses compare-and-swap
 180    instead.  Note that the only memory value that changes is ws->next.  */
 181
 182 bool
 183 gomp_iter_dynamic_next (long *pstart, long *pend)
 184 {
 185   struct gomp_thread *thr = gomp_thread ();
 186   struct gomp_work_share *ws = thr->ts.work_share;
 187   long start, end, nend, chunk, incr;
 188
 189   end = ws->end;
 190   incr = ws->incr;
 191   chunk = ws->chunk_size;
 192
 193   if (__builtin_expect (ws->mode, 1))
 194     {
 195       long tmp = __sync_fetch_and_add (&ws->next, chunk);
 196       if (incr > 0)
 197         {
 198           if (tmp >= end)
 199             return false;
 200           nend = tmp + chunk;
 201           if (nend > end)
 202             nend = end;
 203           *pstart = tmp;
 204           *pend = nend;
 205           return true;
 206         }
 207       else
 208         {
 209           if (tmp <= end)
 210             return false;
 211           nend = tmp + chunk;
 212           if (nend < end)
 213             nend = end;
 214           *pstart = tmp;
 215           *pend = nend;
 216           return true;
 217         }
 218     }
 219
 220   start = ws->next;
 221   while (1)
 222     {
 223       long left = end - start;
 224       long tmp;
 225
 226       if (start == end)
 227         return false;
 228
 229       if (incr < 0)
 230         {
 231           if (chunk < left)
 232             chunk = left;
 233         }
 234       else
 235         {
 236           if (chunk > left)
 237             chunk = left;
 238         }
 239       nend = start + chunk;
 240
 241       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
 242       if (__builtin_expect (tmp == start, 1))
 243         break;
 244
 245       start = tmp;
 246     }
 247
 248   *pstart = start;
 249   *pend = nend;
 250   return true;
 251 }
 252 #endif /* HAVE_SYNC_BUILTINS */
 253
 254
 255 /* This function implements the GUIDED scheduling method.  Arguments are
 256    as for gomp_iter_static_next.  This function must be called with the
 257    work share lock held.  */
 258
 259 bool
 260 gomp_iter_guided_next_locked (long *pstart, long *pend)
 261 {
 262   struct gomp_thread *thr = gomp_thread ();
 263   struct gomp_work_share *ws = thr->ts.work_share;
 264   struct gomp_team *team = thr->ts.team;
 265   unsigned long nthreads = team ? team->nthreads : 1;
 266   unsigned long n, q;
 267   long start, end;
 268
 269   if (ws->next == ws->end)
 270     return false;
 271
 272   start = ws->next;
 273   n = (ws->end - start) / ws->incr;
 274   q = (n + nthreads - 1) / nthreads;
 275
 276   if (q < ws->chunk_size)
 277     q = ws->chunk_size;
 278   if (q <= n)
 279     end = start + q * ws->incr;
 280   else
 281     end = ws->end;
 282
 283   ws->next = end;
 284   *pstart = start;
 285   *pend = end;
 286   return true;
 287 }
 288
 289 #ifdef HAVE_SYNC_BUILTINS
 290 /* Similar, but doesn't require the lock held, and uses compare-and-swap
 291    instead.  Note that the only memory value that changes is ws->next.  */
 292
 293 bool
 294 gomp_iter_guided_next (long *pstart, long *pend)
 295 {
 296   struct gomp_thread *thr = gomp_thread ();
 297   struct gomp_work_share *ws = thr->ts.work_share;
 298   struct gomp_team *team = thr->ts.team;
 299   unsigned long nthreads = team ? team->nthreads : 1;
 300   long start, end, nend, incr;
 301   unsigned long chunk_size;
 302
 303   start = ws->next;
 304   end = ws->end;
 305   incr = ws->incr;
 306   chunk_size = ws->chunk_size;
 307
 308   while (1)
 309     {
 310       unsigned long n, q;
 311       long tmp;
 312
 313       if (start == end)
 314         return false;
 315
 316       n = (end - start) / incr;
 317       q = (n + nthreads - 1) / nthreads;
 318
 319       if (q < chunk_size)
 320         q = chunk_size;
 321       if (__builtin_expect (q <= n, 1))
 322         nend = start + q * incr;
 323       else
 324         nend = end;
 325
 326       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
 327       if (__builtin_expect (tmp == start, 1))
 328         break;
 329
 330       start = tmp;
 331     }
 332
 333   *pstart = start;
 334   *pend = nend;
 335   return true;
 336 }
 337 #endif /* HAVE_SYNC_BUILTINS */