libgomp/loop.c

   1 /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU Offloading and Multi Processing Library
   5    (libgomp).
   6
   7    Libgomp is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15    more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 /* This file handles the LOOP (FOR/DO) construct.  */
  27
  28 #include <limits.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include "libgomp.h"
  32
  33
  34 ialias (GOMP_loop_runtime_next)
  35 ialias_redirect (GOMP_taskgroup_reduction_register)
  36
  37 /* Initialize the given work share construct from the given arguments.  */
  38
  39 static inline void
  40 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
  41                 enum gomp_schedule_type sched, long chunk_size)
  42 {
  43   ws->sched = sched;
  44   ws->chunk_size = chunk_size;
  45   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
  46   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
  47             ? start : end;
  48   ws->incr = incr;
  49   ws->next = start;
  50   if (sched == GFS_DYNAMIC)
  51     {
  52       ws->chunk_size *= incr;
  53
  54 #ifdef HAVE_SYNC_BUILTINS
  55       {
  56         /* For dynamic scheduling prepare things to make each iteration
  57            faster.  */
  58         struct gomp_thread *thr = gomp_thread ();
  59         struct gomp_team *team = thr->ts.team;
  60         long nthreads = team ? team->nthreads : 1;
  61
  62         if (__builtin_expect (incr > 0, 1))
  63           {
  64             /* Cheap overflow protection.  */
  65             if (__builtin_expect ((nthreads | ws->chunk_size)
  66                                   >= 1UL << (sizeof (long)
  67                                              * __CHAR_BIT__ / 2 - 1), 0))
  68               ws->mode = 0;
  69             else
  70               ws->mode = ws->end < (LONG_MAX
  71                                     - (nthreads + 1) * ws->chunk_size);
  72           }
  73         /* Cheap overflow protection.  */
  74         else if (__builtin_expect ((nthreads | -ws->chunk_size)
  75                                    >= 1UL << (sizeof (long)
  76                                               * __CHAR_BIT__ / 2 - 1), 0))
  77           ws->mode = 0;
  78         else
  79           ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
  80       }
  81 #endif
  82     }
  83 }
  84
  85 /* The *_start routines are called when first encountering a loop construct
  86    that is not bound directly to a parallel construct.  The first thread
  87    that arrives will create the work-share construct; subsequent threads
  88    will see the construct exists and allocate work from it.
  89
  90    START, END, INCR are the bounds of the loop; due to the restrictions of
  91    OpenMP, these values must be the same in every thread.  This is not
  92    verified (nor is it entirely verifiable, since START is not necessarily
  93    retained intact in the work-share data structure).  CHUNK_SIZE is the
  94    scheduling parameter; again this must be identical in all threads.
  95
  96    Returns true if there's any work for this thread to perform.  If so,
  97    *ISTART and *IEND are filled with the bounds of the iteration block
  98    allocated to this thread.  Returns false if all work was assigned to
  99    other threads prior to this thread's arrival.  */
 100
 101 static bool
 102 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
 103                         long *istart, long *iend)
 104 {
 105   struct gomp_thread *thr = gomp_thread ();
 106
 107   thr->ts.static_trip = 0;
 108   if (gomp_work_share_start (0))
 109     {
 110       gomp_loop_init (thr->ts.work_share, start, end, incr,
 111                       GFS_STATIC, chunk_size);
 112       gomp_work_share_init_done ();
 113     }
 114
 115   return !gomp_iter_static_next (istart, iend);
 116 }
 117
 118 /* The current dynamic implementation is always monotonic.  The
 119    entrypoints without nonmonotonic in them have to be always monotonic,
 120    but the nonmonotonic ones could be changed to use work-stealing for
 121    improved scalability.  */
 122
 123 static bool
 124 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
 125                          long *istart, long *iend)
 126 {
 127   struct gomp_thread *thr = gomp_thread ();
 128   bool ret;
 129
 130   if (gomp_work_share_start (0))
 131     {
 132       gomp_loop_init (thr->ts.work_share, start, end, incr,
 133                       GFS_DYNAMIC, chunk_size);
 134       gomp_work_share_init_done ();
 135     }
 136
 137 #ifdef HAVE_SYNC_BUILTINS
 138   ret = gomp_iter_dynamic_next (istart, iend);
 139 #else
 140   gomp_mutex_lock (&thr->ts.work_share->lock);
 141   ret = gomp_iter_dynamic_next_locked (istart, iend);
 142   gomp_mutex_unlock (&thr->ts.work_share->lock);
 143 #endif
 144
 145   return ret;
 146 }
 147
 148 /* Similarly as for dynamic, though the question is how can the chunk sizes
 149    be decreased without a central locking or atomics.  */
 150
 151 static bool
 152 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
 153                         long *istart, long *iend)
 154 {
 155   struct gomp_thread *thr = gomp_thread ();
 156   bool ret;
 157
 158   if (gomp_work_share_start (0))
 159     {
 160       gomp_loop_init (thr->ts.work_share, start, end, incr,
 161                       GFS_GUIDED, chunk_size);
 162       gomp_work_share_init_done ();
 163     }
 164
 165 #ifdef HAVE_SYNC_BUILTINS
 166   ret = gomp_iter_guided_next (istart, iend);
 167 #else
 168   gomp_mutex_lock (&thr->ts.work_share->lock);
 169   ret = gomp_iter_guided_next_locked (istart, iend);
 170   gomp_mutex_unlock (&thr->ts.work_share->lock);
 171 #endif
 172
 173   return ret;
 174 }
 175
 176 bool
 177 GOMP_loop_runtime_start (long start, long end, long incr,
 178                          long *istart, long *iend)
 179 {
 180   struct gomp_task_icv *icv = gomp_icv (false);
 181   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 182     {
 183     case GFS_STATIC:
 184       return gomp_loop_static_start (start, end, incr,
 185                                      icv->run_sched_chunk_size,
 186                                      istart, iend);
 187     case GFS_DYNAMIC:
 188       return gomp_loop_dynamic_start (start, end, incr,
 189                                       icv->run_sched_chunk_size,
 190                                       istart, iend);
 191     case GFS_GUIDED:
 192       return gomp_loop_guided_start (start, end, incr,
 193                                      icv->run_sched_chunk_size,
 194                                      istart, iend);
 195     case GFS_AUTO:
 196       /* For now map to schedule(static), later on we could play with feedback
 197          driven choice.  */
 198       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
 199     default:
 200       abort ();
 201     }
 202 }
 203
 204 static long
 205 gomp_adjust_sched (long sched, long *chunk_size)
 206 {
 207   sched &= ~GFS_MONOTONIC;
 208   switch (sched)
 209     {
 210     case GFS_STATIC:
 211     case GFS_DYNAMIC:
 212     case GFS_GUIDED:
 213       return sched;
 214     /* GFS_RUNTIME is used for runtime schedule without monotonic
 215        or nonmonotonic modifiers on the clause.
 216        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
 217        modifier.  */
 218     case GFS_RUNTIME:
 219     /* GFS_AUTO is used for runtime schedule with nonmonotonic
 220        modifier.  */
 221     case GFS_AUTO:
 222       {
 223         struct gomp_task_icv *icv = gomp_icv (false);
 224         sched = icv->run_sched_var & ~GFS_MONOTONIC;
 225         switch (sched)
 226           {
 227           case GFS_STATIC:
 228           case GFS_DYNAMIC:
 229           case GFS_GUIDED:
 230             *chunk_size = icv->run_sched_chunk_size;
 231             break;
 232           case GFS_AUTO:
 233             sched = GFS_STATIC;
 234             *chunk_size = 0;
 235             break;
 236           default:
 237             abort ();
 238           }
 239         return sched;
 240       }
 241     default:
 242       abort ();
 243     }
 244 }
 245
 246 bool
 247 GOMP_loop_start (long start, long end, long incr, long sched,
 248                  long chunk_size, long *istart, long *iend,
 249                  uintptr_t *reductions, void **mem)
 250 {
 251   struct gomp_thread *thr = gomp_thread ();
 252
 253   thr->ts.static_trip = 0;
 254   if (reductions)
 255     gomp_workshare_taskgroup_start ();
 256   if (gomp_work_share_start (0))
 257     {
 258       sched = gomp_adjust_sched (sched, &chunk_size);
 259       gomp_loop_init (thr->ts.work_share, start, end, incr,
 260                       sched, chunk_size);
 261       if (reductions)
 262         {
 263           GOMP_taskgroup_reduction_register (reductions);
 264           thr->task->taskgroup->workshare = true;
 265           thr->ts.work_share->task_reductions = reductions;
 266         }
 267       if (mem)
 268         {
 269           uintptr_t size = (uintptr_t) *mem;
 270           if (size > (sizeof (struct gomp_work_share)
 271                       - offsetof (struct gomp_work_share,
 272                                   inline_ordered_team_ids)))
 273             thr->ts.work_share->ordered_team_ids
 274               = gomp_malloc_cleared (size);
 275           else
 276             memset (thr->ts.work_share->ordered_team_ids, '\0', size);
 277           *mem = (void *) thr->ts.work_share->ordered_team_ids;
 278         }
 279       gomp_work_share_init_done ();
 280     }
 281   else
 282     {
 283       if (reductions)
 284         {
 285           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 286           gomp_workshare_task_reduction_register (reductions,
 287                                                   first_reductions);
 288         }
 289       if (mem)
 290         *mem = (void *) thr->ts.work_share->ordered_team_ids;
 291     }
 292
 293   if (!istart)
 294     return true;
 295   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 296 }
 297
 298 /* The *_ordered_*_start routines are similar.  The only difference is that
 299    this work-share construct is initialized to expect an ORDERED section.  */
 300
 301 static bool
 302 gomp_loop_ordered_static_start (long start, long end, long incr,
 303                                 long chunk_size, long *istart, long *iend)
 304 {
 305   struct gomp_thread *thr = gomp_thread ();
 306
 307   thr->ts.static_trip = 0;
 308   if (gomp_work_share_start (1))
 309     {
 310       gomp_loop_init (thr->ts.work_share, start, end, incr,
 311                       GFS_STATIC, chunk_size);
 312       gomp_ordered_static_init ();
 313       gomp_work_share_init_done ();
 314     }
 315
 316   return !gomp_iter_static_next (istart, iend);
 317 }
 318
 319 static bool
 320 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
 321                                  long chunk_size, long *istart, long *iend)
 322 {
 323   struct gomp_thread *thr = gomp_thread ();
 324   bool ret;
 325
 326   if (gomp_work_share_start (1))
 327     {
 328       gomp_loop_init (thr->ts.work_share, start, end, incr,
 329                       GFS_DYNAMIC, chunk_size);
 330       gomp_mutex_lock (&thr->ts.work_share->lock);
 331       gomp_work_share_init_done ();
 332     }
 333   else
 334     gomp_mutex_lock (&thr->ts.work_share->lock);
 335
 336   ret = gomp_iter_dynamic_next_locked (istart, iend);
 337   if (ret)
 338     gomp_ordered_first ();
 339   gomp_mutex_unlock (&thr->ts.work_share->lock);
 340
 341   return ret;
 342 }
 343
 344 static bool
 345 gomp_loop_ordered_guided_start (long start, long end, long incr,
 346                                 long chunk_size, long *istart, long *iend)
 347 {
 348   struct gomp_thread *thr = gomp_thread ();
 349   bool ret;
 350
 351   if (gomp_work_share_start (1))
 352     {
 353       gomp_loop_init (thr->ts.work_share, start, end, incr,
 354                       GFS_GUIDED, chunk_size);
 355       gomp_mutex_lock (&thr->ts.work_share->lock);
 356       gomp_work_share_init_done ();
 357     }
 358   else
 359     gomp_mutex_lock (&thr->ts.work_share->lock);
 360
 361   ret = gomp_iter_guided_next_locked (istart, iend);
 362   if (ret)
 363     gomp_ordered_first ();
 364   gomp_mutex_unlock (&thr->ts.work_share->lock);
 365
 366   return ret;
 367 }
 368
 369 bool
 370 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
 371                                  long *istart, long *iend)
 372 {
 373   struct gomp_task_icv *icv = gomp_icv (false);
 374   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 375     {
 376     case GFS_STATIC:
 377       return gomp_loop_ordered_static_start (start, end, incr,
 378                                              icv->run_sched_chunk_size,
 379                                              istart, iend);
 380     case GFS_DYNAMIC:
 381       return gomp_loop_ordered_dynamic_start (start, end, incr,
 382                                               icv->run_sched_chunk_size,
 383                                               istart, iend);
 384     case GFS_GUIDED:
 385       return gomp_loop_ordered_guided_start (start, end, incr,
 386                                              icv->run_sched_chunk_size,
 387                                              istart, iend);
 388     case GFS_AUTO:
 389       /* For now map to schedule(static), later on we could play with feedback
 390          driven choice.  */
 391       return gomp_loop_ordered_static_start (start, end, incr,
 392                                              0, istart, iend);
 393     default:
 394       abort ();
 395     }
 396 }
 397
 398 bool
 399 GOMP_loop_ordered_start (long start, long end, long incr, long sched,
 400                          long chunk_size, long *istart, long *iend,
 401                          uintptr_t *reductions, void **mem)
 402 {
 403   struct gomp_thread *thr = gomp_thread ();
 404   size_t ordered = 1;
 405   bool ret;
 406
 407   thr->ts.static_trip = 0;
 408   if (reductions)
 409     gomp_workshare_taskgroup_start ();
 410   if (mem)
 411     ordered += (uintptr_t) *mem;
 412   if (gomp_work_share_start (ordered))
 413     {
 414       sched = gomp_adjust_sched (sched, &chunk_size);
 415       gomp_loop_init (thr->ts.work_share, start, end, incr,
 416                       sched, chunk_size);
 417       if (reductions)
 418         {
 419           GOMP_taskgroup_reduction_register (reductions);
 420           thr->task->taskgroup->workshare = true;
 421           thr->ts.work_share->task_reductions = reductions;
 422         }
 423       if (sched == GFS_STATIC)
 424         gomp_ordered_static_init ();
 425       else
 426         gomp_mutex_lock (&thr->ts.work_share->lock);
 427       gomp_work_share_init_done ();
 428     }
 429   else
 430     {
 431       if (reductions)
 432         {
 433           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 434           gomp_workshare_task_reduction_register (reductions,
 435                                                   first_reductions);
 436         }
 437       sched = thr->ts.work_share->sched;
 438       if (sched != GFS_STATIC)
 439         gomp_mutex_lock (&thr->ts.work_share->lock);
 440     }
 441
 442   if (mem)
 443     {
 444       uintptr_t p
 445         = (uintptr_t) (thr->ts.work_share->ordered_team_ids
 446                        + (thr->ts.team ? thr->ts.team->nthreads : 1));
 447       p += __alignof__ (long long) - 1;
 448       p &= ~(__alignof__ (long long) - 1);
 449       *mem = (void *) p;
 450     }
 451
 452   switch (sched)
 453     {
 454     case GFS_STATIC:
 455     case GFS_AUTO:
 456       return !gomp_iter_static_next (istart, iend);
 457     case GFS_DYNAMIC:
 458       ret = gomp_iter_dynamic_next_locked (istart, iend);
 459       break;
 460     case GFS_GUIDED:
 461       ret = gomp_iter_guided_next_locked (istart, iend);
 462       break;
 463     default:
 464       abort ();
 465     }
 466
 467   if (ret)
 468     gomp_ordered_first ();
 469   gomp_mutex_unlock (&thr->ts.work_share->lock);
 470   return ret;
 471 }
 472
 473 /* The *_doacross_*_start routines are similar.  The only difference is that
 474    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
 475    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
 476    and other COUNTS array elements tell the library number of iterations
 477    in the ordered inner loops.  */
 478
 479 static bool
 480 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
 481                                  long chunk_size, long *istart, long *iend)
 482 {
 483   struct gomp_thread *thr = gomp_thread ();
 484
 485   thr->ts.static_trip = 0;
 486   if (gomp_work_share_start (0))
 487     {
 488       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 489                       GFS_STATIC, chunk_size);
 490       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 491       gomp_work_share_init_done ();
 492     }
 493
 494   return !gomp_iter_static_next (istart, iend);
 495 }
 496
 497 static bool
 498 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
 499                                   long chunk_size, long *istart, long *iend)
 500 {
 501   struct gomp_thread *thr = gomp_thread ();
 502   bool ret;
 503
 504   if (gomp_work_share_start (0))
 505     {
 506       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 507                       GFS_DYNAMIC, chunk_size);
 508       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 509       gomp_work_share_init_done ();
 510     }
 511
 512 #ifdef HAVE_SYNC_BUILTINS
 513   ret = gomp_iter_dynamic_next (istart, iend);
 514 #else
 515   gomp_mutex_lock (&thr->ts.work_share->lock);
 516   ret = gomp_iter_dynamic_next_locked (istart, iend);
 517   gomp_mutex_unlock (&thr->ts.work_share->lock);
 518 #endif
 519
 520   return ret;
 521 }
 522
 523 static bool
 524 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
 525                                  long chunk_size, long *istart, long *iend)
 526 {
 527   struct gomp_thread *thr = gomp_thread ();
 528   bool ret;
 529
 530   if (gomp_work_share_start (0))
 531     {
 532       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 533                       GFS_GUIDED, chunk_size);
 534       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 535       gomp_work_share_init_done ();
 536     }
 537
 538 #ifdef HAVE_SYNC_BUILTINS
 539   ret = gomp_iter_guided_next (istart, iend);
 540 #else
 541   gomp_mutex_lock (&thr->ts.work_share->lock);
 542   ret = gomp_iter_guided_next_locked (istart, iend);
 543   gomp_mutex_unlock (&thr->ts.work_share->lock);
 544 #endif
 545
 546   return ret;
 547 }
 548
 549 bool
 550 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
 551                                   long *istart, long *iend)
 552 {
 553   struct gomp_task_icv *icv = gomp_icv (false);
 554   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 555     {
 556     case GFS_STATIC:
 557       return gomp_loop_doacross_static_start (ncounts, counts,
 558                                               icv->run_sched_chunk_size,
 559                                               istart, iend);
 560     case GFS_DYNAMIC:
 561       return gomp_loop_doacross_dynamic_start (ncounts, counts,
 562                                                icv->run_sched_chunk_size,
 563                                                istart, iend);
 564     case GFS_GUIDED:
 565       return gomp_loop_doacross_guided_start (ncounts, counts,
 566                                               icv->run_sched_chunk_size,
 567                                               istart, iend);
 568     case GFS_AUTO:
 569       /* For now map to schedule(static), later on we could play with feedback
 570          driven choice.  */
 571       return gomp_loop_doacross_static_start (ncounts, counts,
 572                                               0, istart, iend);
 573     default:
 574       abort ();
 575     }
 576 }
 577
 578 bool
 579 GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
 580                           long chunk_size, long *istart, long *iend,
 581                           uintptr_t *reductions, void **mem)
 582 {
 583   struct gomp_thread *thr = gomp_thread ();
 584
 585   thr->ts.static_trip = 0;
 586   if (reductions)
 587     gomp_workshare_taskgroup_start ();
 588   if (gomp_work_share_start (0))
 589     {
 590       size_t extra = 0;
 591       if (mem)
 592         extra = (uintptr_t) *mem;
 593       sched = gomp_adjust_sched (sched, &chunk_size);
 594       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 595                       sched, chunk_size);
 596       gomp_doacross_init (ncounts, counts, chunk_size, extra);
 597       if (reductions)
 598         {
 599           GOMP_taskgroup_reduction_register (reductions);
 600           thr->task->taskgroup->workshare = true;
 601           thr->ts.work_share->task_reductions = reductions;
 602         }
 603       gomp_work_share_init_done ();
 604     }
 605   else
 606     {
 607       if (reductions)
 608         {
 609           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 610           gomp_workshare_task_reduction_register (reductions,
 611                                                   first_reductions);
 612         }
 613       sched = thr->ts.work_share->sched;
 614     }
 615
 616   if (mem)
 617     *mem = thr->ts.work_share->doacross->extra;
 618
 619   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 620 }
 621
 622 /* The *_next routines are called when the thread completes processing of
 623    the iteration block currently assigned to it.  If the work-share
 624    construct is bound directly to a parallel construct, then the iteration
 625    bounds may have been set up before the parallel.  In which case, this
 626    may be the first iteration for the thread.
 627
 628    Returns true if there is work remaining to be performed; *ISTART and
 629    *IEND are filled with a new iteration block.  Returns false if all work
 630    has been assigned.  */
 631
 632 static bool
 633 gomp_loop_static_next (long *istart, long *iend)
 634 {
 635   return !gomp_iter_static_next (istart, iend);
 636 }
 637
 638 static bool
 639 gomp_loop_dynamic_next (long *istart, long *iend)
 640 {
 641   bool ret;
 642
 643 #ifdef HAVE_SYNC_BUILTINS
 644   ret = gomp_iter_dynamic_next (istart, iend);
 645 #else
 646   struct gomp_thread *thr = gomp_thread ();
 647   gomp_mutex_lock (&thr->ts.work_share->lock);
 648   ret = gomp_iter_dynamic_next_locked (istart, iend);
 649   gomp_mutex_unlock (&thr->ts.work_share->lock);
 650 #endif
 651
 652   return ret;
 653 }
 654
 655 static bool
 656 gomp_loop_guided_next (long *istart, long *iend)
 657 {
 658   bool ret;
 659
 660 #ifdef HAVE_SYNC_BUILTINS
 661   ret = gomp_iter_guided_next (istart, iend);
 662 #else
 663   struct gomp_thread *thr = gomp_thread ();
 664   gomp_mutex_lock (&thr->ts.work_share->lock);
 665   ret = gomp_iter_guided_next_locked (istart, iend);
 666   gomp_mutex_unlock (&thr->ts.work_share->lock);
 667 #endif
 668
 669   return ret;
 670 }
 671
 672 bool
 673 GOMP_loop_runtime_next (long *istart, long *iend)
 674 {
 675   struct gomp_thread *thr = gomp_thread ();
 676
 677   switch (thr->ts.work_share->sched)
 678     {
 679     case GFS_STATIC:
 680     case GFS_AUTO:
 681       return gomp_loop_static_next (istart, iend);
 682     case GFS_DYNAMIC:
 683       return gomp_loop_dynamic_next (istart, iend);
 684     case GFS_GUIDED:
 685       return gomp_loop_guided_next (istart, iend);
 686     default:
 687       abort ();
 688     }
 689 }
 690
 691 /* The *_ordered_*_next routines are called when the thread completes
 692    processing of the iteration block currently assigned to it.
 693
 694    Returns true if there is work remaining to be performed; *ISTART and
 695    *IEND are filled with a new iteration block.  Returns false if all work
 696    has been assigned.  */
 697
 698 static bool
 699 gomp_loop_ordered_static_next (long *istart, long *iend)
 700 {
 701   struct gomp_thread *thr = gomp_thread ();
 702   int test;
 703
 704   gomp_ordered_sync ();
 705   gomp_mutex_lock (&thr->ts.work_share->lock);
 706   test = gomp_iter_static_next (istart, iend);
 707   if (test >= 0)
 708     gomp_ordered_static_next ();
 709   gomp_mutex_unlock (&thr->ts.work_share->lock);
 710
 711   return test == 0;
 712 }
 713
 714 static bool
 715 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
 716 {
 717   struct gomp_thread *thr = gomp_thread ();
 718   bool ret;
 719
 720   gomp_ordered_sync ();
 721   gomp_mutex_lock (&thr->ts.work_share->lock);
 722   ret = gomp_iter_dynamic_next_locked (istart, iend);
 723   if (ret)
 724     gomp_ordered_next ();
 725   else
 726     gomp_ordered_last ();
 727   gomp_mutex_unlock (&thr->ts.work_share->lock);
 728
 729   return ret;
 730 }
 731
 732 static bool
 733 gomp_loop_ordered_guided_next (long *istart, long *iend)
 734 {
 735   struct gomp_thread *thr = gomp_thread ();
 736   bool ret;
 737
 738   gomp_ordered_sync ();
 739   gomp_mutex_lock (&thr->ts.work_share->lock);
 740   ret = gomp_iter_guided_next_locked (istart, iend);
 741   if (ret)
 742     gomp_ordered_next ();
 743   else
 744     gomp_ordered_last ();
 745   gomp_mutex_unlock (&thr->ts.work_share->lock);
 746
 747   return ret;
 748 }
 749
 750 bool
 751 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
 752 {
 753   struct gomp_thread *thr = gomp_thread ();
 754
 755   switch (thr->ts.work_share->sched)
 756     {
 757     case GFS_STATIC:
 758     case GFS_AUTO:
 759       return gomp_loop_ordered_static_next (istart, iend);
 760     case GFS_DYNAMIC:
 761       return gomp_loop_ordered_dynamic_next (istart, iend);
 762     case GFS_GUIDED:
 763       return gomp_loop_ordered_guided_next (istart, iend);
 764     default:
 765       abort ();
 766     }
 767 }
 768
 769 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
 770    to avoid one synchronization once we get into the loop.  */
 771
 772 static void
 773 gomp_parallel_loop_start (void (*fn) (void *), void *data,
 774                           unsigned num_threads, long start, long end,
 775                           long incr, enum gomp_schedule_type sched,
 776                           long chunk_size, unsigned int flags)
 777 {
 778   struct gomp_team *team;
 779
 780   num_threads = gomp_resolve_num_threads (num_threads, 0);
 781   team = gomp_new_team (num_threads);
 782   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
 783   gomp_team_start (fn, data, num_threads, flags, team, NULL);
 784 }
 785
 786 void
 787 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
 788                                  unsigned num_threads, long start, long end,
 789                                  long incr, long chunk_size)
 790 {
 791   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 792                             GFS_STATIC, chunk_size, 0);
 793 }
 794
 795 void
 796 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
 797                                   unsigned num_threads, long start, long end,
 798                                   long incr, long chunk_size)
 799 {
 800   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 801                             GFS_DYNAMIC, chunk_size, 0);
 802 }
 803
 804 void
 805 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
 806                                  unsigned num_threads, long start, long end,
 807                                  long incr, long chunk_size)
 808 {
 809   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 810                             GFS_GUIDED, chunk_size, 0);
 811 }
 812
 813 void
 814 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
 815                                   unsigned num_threads, long start, long end,
 816                                   long incr)
 817 {
 818   struct gomp_task_icv *icv = gomp_icv (false);
 819   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 820                             icv->run_sched_var & ~GFS_MONOTONIC,
 821                             icv->run_sched_chunk_size, 0);
 822 }
 823
 824 ialias_redirect (GOMP_parallel_end)
 825
 826 void
 827 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
 828                            unsigned num_threads, long start, long end,
 829                            long incr, long chunk_size, unsigned flags)
 830 {
 831   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 832                             GFS_STATIC, chunk_size, flags);
 833   fn (data);
 834   GOMP_parallel_end ();
 835 }
 836
 837 void
 838 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
 839                             unsigned num_threads, long start, long end,
 840                             long incr, long chunk_size, unsigned flags)
 841 {
 842   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 843                             GFS_DYNAMIC, chunk_size, flags);
 844   fn (data);
 845   GOMP_parallel_end ();
 846 }
 847
 848 void
 849 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
 850                           unsigned num_threads, long start, long end,
 851                           long incr, long chunk_size, unsigned flags)
 852 {
 853   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 854                             GFS_GUIDED, chunk_size, flags);
 855   fn (data);
 856   GOMP_parallel_end ();
 857 }
 858
 859 void
 860 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
 861                             unsigned num_threads, long start, long end,
 862                             long incr, unsigned flags)
 863 {
 864   struct gomp_task_icv *icv = gomp_icv (false);
 865   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 866                             icv->run_sched_var & ~GFS_MONOTONIC,
 867                             icv->run_sched_chunk_size, flags);
 868   fn (data);
 869   GOMP_parallel_end ();
 870 }
 871
 872 #ifdef HAVE_ATTRIBUTE_ALIAS
 873 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
 874         __attribute__((alias ("GOMP_parallel_loop_dynamic")));
 875 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
 876         __attribute__((alias ("GOMP_parallel_loop_guided")));
 877 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
 878         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 879 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
 880         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 881 #else
 882 void
 883 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
 884                                          unsigned num_threads, long start,
 885                                          long end, long incr, long chunk_size,
 886                                          unsigned flags)
 887 {
 888   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 889                             GFS_DYNAMIC, chunk_size, flags);
 890   fn (data);
 891   GOMP_parallel_end ();
 892 }
 893
 894 void
 895 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
 896                                         unsigned num_threads, long start,
 897                                         long end, long incr, long chunk_size,
 898                                         unsigned flags)
 899 {
 900   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 901                             GFS_GUIDED, chunk_size, flags);
 902   fn (data);
 903   GOMP_parallel_end ();
 904 }
 905
 906 void
 907 GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
 908                                          unsigned num_threads, long start,
 909                                          long end, long incr, unsigned flags)
 910 {
 911   struct gomp_task_icv *icv = gomp_icv (false);
 912   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 913                             icv->run_sched_var & ~GFS_MONOTONIC,
 914                             icv->run_sched_chunk_size, flags);
 915   fn (data);
 916   GOMP_parallel_end ();
 917 }
 918
 919 void
 920 GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
 921                                                unsigned num_threads, long start,
 922                                                long end, long incr,
 923                                                unsigned flags)
 924 {
 925   struct gomp_task_icv *icv = gomp_icv (false);
 926   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 927                             icv->run_sched_var & ~GFS_MONOTONIC,
 928                             icv->run_sched_chunk_size, flags);
 929   fn (data);
 930   GOMP_parallel_end ();
 931 }
 932 #endif
 933
 934 /* The GOMP_loop_end* routines are called after the thread is told that
 935    all loop iterations are complete.  The first two versions synchronize
 936    all threads; the nowait version does not.  */
 937
 938 void
 939 GOMP_loop_end (void)
 940 {
 941   gomp_work_share_end ();
 942 }
 943
 944 bool
 945 GOMP_loop_end_cancel (void)
 946 {
 947   return gomp_work_share_end_cancel ();
 948 }
 949
 950 void
 951 GOMP_loop_end_nowait (void)
 952 {
 953   gomp_work_share_end_nowait ();
 954 }
 955
 956
 957 /* We use static functions above so that we're sure that the "runtime"
 958    function can defer to the proper routine without interposition.  We
 959    export the static function with a strong alias when possible, or with
 960    a wrapper function otherwise.  */
 961
 962 #ifdef HAVE_ATTRIBUTE_ALIAS
 963 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
 964         __attribute__((alias ("gomp_loop_static_start")));
 965 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
 966         __attribute__((alias ("gomp_loop_dynamic_start")));
 967 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
 968         __attribute__((alias ("gomp_loop_guided_start")));
 969 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
 970         __attribute__((alias ("gomp_loop_dynamic_start")));
 971 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
 972         __attribute__((alias ("gomp_loop_guided_start")));
 973 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
 974         __attribute__((alias ("GOMP_loop_runtime_start")));
 975 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
 976         __attribute__((alias ("GOMP_loop_runtime_start")));
 977
 978 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
 979         __attribute__((alias ("gomp_loop_ordered_static_start")));
 980 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
 981         __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
 982 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
 983         __attribute__((alias ("gomp_loop_ordered_guided_start")));
 984
 985 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
 986         __attribute__((alias ("gomp_loop_doacross_static_start")));
 987 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
 988         __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
 989 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
 990         __attribute__((alias ("gomp_loop_doacross_guided_start")));
 991
 992 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
 993         __attribute__((alias ("gomp_loop_static_next")));
 994 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
 995         __attribute__((alias ("gomp_loop_dynamic_next")));
 996 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
 997         __attribute__((alias ("gomp_loop_guided_next")));
 998 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
 999         __attribute__((alias ("gomp_loop_dynamic_next")));
1000 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1001         __attribute__((alias ("gomp_loop_guided_next")));
1002 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1003         __attribute__((alias ("GOMP_loop_runtime_next")));
1004 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1005         __attribute__((alias ("GOMP_loop_runtime_next")));
1006
1007 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1008         __attribute__((alias ("gomp_loop_ordered_static_next")));
1009 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1010         __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1011 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1012         __attribute__((alias ("gomp_loop_ordered_guided_next")));
1013 #else
1014 bool
1015 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1016                         long *istart, long *iend)
1017 {
1018   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1019 }
1020
1021 bool
1022 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1023                          long *istart, long *iend)
1024 {
1025   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1026 }
1027
1028 bool
1029 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1030                         long *istart, long *iend)
1031 {
1032   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1033 }
1034
1035 bool
1036 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1037                                       long chunk_size, long *istart,
1038                                       long *iend)
1039 {
1040   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1041 }
1042
1043 bool
1044 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1045                                      long chunk_size, long *istart, long *iend)
1046 {
1047   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1048 }
1049
1050 bool
1051 GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1052                                       long *istart, long *iend)
1053 {
1054   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1055 }
1056
1057 bool
1058 GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1059                                             long *istart, long *iend)
1060 {
1061   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1062 }
1063
1064 bool
1065 GOMP_loop_ordered_static_start (long start, long end, long incr,
1066                                 long chunk_size, long *istart, long *iend)
1067 {
1068   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1069                                          istart, iend);
1070 }
1071
1072 bool
1073 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1074                                  long chunk_size, long *istart, long *iend)
1075 {
1076   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1077                                           istart, iend);
1078 }
1079
1080 bool
1081 GOMP_loop_ordered_guided_start (long start, long end, long incr,
1082                                 long chunk_size, long *istart, long *iend)
1083 {
1084   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1085                                          istart, iend);
1086 }
1087
1088 bool
1089 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1090                                  long chunk_size, long *istart, long *iend)
1091 {
1092   return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1093                                           istart, iend);
1094 }
1095
1096 bool
1097 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1098                                   long chunk_size, long *istart, long *iend)
1099 {
1100   return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1101                                            istart, iend);
1102 }
1103
1104 bool
1105 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1106                                  long chunk_size, long *istart, long *iend)
1107 {
1108   return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1109                                           istart, iend);
1110 }
1111
1112 bool
1113 GOMP_loop_static_next (long *istart, long *iend)
1114 {
1115   return gomp_loop_static_next (istart, iend);
1116 }
1117
1118 bool
1119 GOMP_loop_dynamic_next (long *istart, long *iend)
1120 {
1121   return gomp_loop_dynamic_next (istart, iend);
1122 }
1123
1124 bool
1125 GOMP_loop_guided_next (long *istart, long *iend)
1126 {
1127   return gomp_loop_guided_next (istart, iend);
1128 }
1129
1130 bool
1131 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1132 {
1133   return gomp_loop_dynamic_next (istart, iend);
1134 }
1135
1136 bool
1137 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1138 {
1139   return gomp_loop_guided_next (istart, iend);
1140 }
1141
1142 bool
1143 GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1144 {
1145   return GOMP_loop_runtime_next (istart, iend);
1146 }
1147
1148 bool
1149 GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1150 {
1151   return GOMP_loop_runtime_next (istart, iend);
1152 }
1153
1154 bool
1155 GOMP_loop_ordered_static_next (long *istart, long *iend)
1156 {
1157   return gomp_loop_ordered_static_next (istart, iend);
1158 }
1159
1160 bool
1161 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1162 {
1163   return gomp_loop_ordered_dynamic_next (istart, iend);
1164 }
1165
1166 bool
1167 GOMP_loop_ordered_guided_next (long *istart, long *iend)
1168 {
1169   return gomp_loop_ordered_guided_next (istart, iend);
1170 }
1171 #endif