libgomp/loop.c

   1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU Offloading and Multi Processing Library
   5    (libgomp).
   6
   7    Libgomp is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15    more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 /* This file handles the LOOP (FOR/DO) construct.  */
  27
  28 #include <limits.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include "libgomp.h"
  32
  33
  34 ialias (GOMP_loop_runtime_next)
  35 ialias_redirect (GOMP_taskgroup_reduction_register)
  36
  37 /* Initialize the given work share construct from the given arguments.  */
  38
  39 static inline void
  40 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
  41                 enum gomp_schedule_type sched, long chunk_size)
  42 {
  43   ws->sched = sched;
  44   ws->chunk_size = chunk_size;
  45   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
  46   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
  47             ? start : end;
  48   ws->incr = incr;
  49   ws->next = start;
  50   if (sched == GFS_DYNAMIC)
  51     {
  52       ws->chunk_size *= incr;
  53
  54 #ifdef HAVE_SYNC_BUILTINS
  55       {
  56         /* For dynamic scheduling prepare things to make each iteration
  57            faster.  */
  58         struct gomp_thread *thr = gomp_thread ();
  59         struct gomp_team *team = thr->ts.team;
  60         long nthreads = team ? team->nthreads : 1;
  61
  62         if (__builtin_expect (incr > 0, 1))
  63           {
  64             /* Cheap overflow protection.  */
  65             if (__builtin_expect ((nthreads | ws->chunk_size)
  66                                   >= 1UL << (sizeof (long)
  67                                              * __CHAR_BIT__ / 2 - 1), 0))
  68               ws->mode = 0;
  69             else
  70               ws->mode = ws->end < (LONG_MAX
  71                                     - (nthreads + 1) * ws->chunk_size);
  72           }
  73         /* Cheap overflow protection.  */
  74         else if (__builtin_expect ((nthreads | -ws->chunk_size)
  75                                    >= 1UL << (sizeof (long)
  76                                               * __CHAR_BIT__ / 2 - 1), 0))
  77           ws->mode = 0;
  78         else
  79           ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
  80       }
  81 #endif
  82     }
  83 }
  84
  85 /* The *_start routines are called when first encountering a loop construct
  86    that is not bound directly to a parallel construct.  The first thread
  87    that arrives will create the work-share construct; subsequent threads
  88    will see the construct exists and allocate work from it.
  89
  90    START, END, INCR are the bounds of the loop; due to the restrictions of
  91    OpenMP, these values must be the same in every thread.  This is not
  92    verified (nor is it entirely verifiable, since START is not necessarily
  93    retained intact in the work-share data structure).  CHUNK_SIZE is the
  94    scheduling parameter; again this must be identical in all threads.
  95
  96    Returns true if there's any work for this thread to perform.  If so,
  97    *ISTART and *IEND are filled with the bounds of the iteration block
  98    allocated to this thread.  Returns false if all work was assigned to
  99    other threads prior to this thread's arrival.  */
 100
 101 static bool
 102 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
 103                         long *istart, long *iend)
 104 {
 105   struct gomp_thread *thr = gomp_thread ();
 106
 107   thr->ts.static_trip = 0;
 108   if (gomp_work_share_start (0))
 109     {
 110       gomp_loop_init (thr->ts.work_share, start, end, incr,
 111                       GFS_STATIC, chunk_size);
 112       gomp_work_share_init_done ();
 113     }
 114
 115   return !gomp_iter_static_next (istart, iend);
 116 }
 117
 118 /* The current dynamic implementation is always monotonic.  The
 119    entrypoints without nonmonotonic in them have to be always monotonic,
 120    but the nonmonotonic ones could be changed to use work-stealing for
 121    improved scalability.  */
 122
 123 static bool
 124 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
 125                          long *istart, long *iend)
 126 {
 127   struct gomp_thread *thr = gomp_thread ();
 128   bool ret;
 129
 130   if (gomp_work_share_start (0))
 131     {
 132       gomp_loop_init (thr->ts.work_share, start, end, incr,
 133                       GFS_DYNAMIC, chunk_size);
 134       gomp_work_share_init_done ();
 135     }
 136
 137 #ifdef HAVE_SYNC_BUILTINS
 138   ret = gomp_iter_dynamic_next (istart, iend);
 139 #else
 140   gomp_mutex_lock (&thr->ts.work_share->lock);
 141   ret = gomp_iter_dynamic_next_locked (istart, iend);
 142   gomp_mutex_unlock (&thr->ts.work_share->lock);
 143 #endif
 144
 145   return ret;
 146 }
 147
 148 /* Similarly as for dynamic, though the question is how can the chunk sizes
 149    be decreased without a central locking or atomics.  */
 150
 151 static bool
 152 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
 153                         long *istart, long *iend)
 154 {
 155   struct gomp_thread *thr = gomp_thread ();
 156   bool ret;
 157
 158   if (gomp_work_share_start (0))
 159     {
 160       gomp_loop_init (thr->ts.work_share, start, end, incr,
 161                       GFS_GUIDED, chunk_size);
 162       gomp_work_share_init_done ();
 163     }
 164
 165 #ifdef HAVE_SYNC_BUILTINS
 166   ret = gomp_iter_guided_next (istart, iend);
 167 #else
 168   gomp_mutex_lock (&thr->ts.work_share->lock);
 169   ret = gomp_iter_guided_next_locked (istart, iend);
 170   gomp_mutex_unlock (&thr->ts.work_share->lock);
 171 #endif
 172
 173   return ret;
 174 }
 175
 176 bool
 177 GOMP_loop_runtime_start (long start, long end, long incr,
 178                          long *istart, long *iend)
 179 {
 180   struct gomp_task_icv *icv = gomp_icv (false);
 181   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 182     {
 183     case GFS_STATIC:
 184       return gomp_loop_static_start (start, end, incr,
 185                                      icv->run_sched_chunk_size,
 186                                      istart, iend);
 187     case GFS_DYNAMIC:
 188       return gomp_loop_dynamic_start (start, end, incr,
 189                                       icv->run_sched_chunk_size,
 190                                       istart, iend);
 191     case GFS_GUIDED:
 192       return gomp_loop_guided_start (start, end, incr,
 193                                      icv->run_sched_chunk_size,
 194                                      istart, iend);
 195     case GFS_AUTO:
 196       /* For now map to schedule(static), later on we could play with feedback
 197          driven choice.  */
 198       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
 199     default:
 200       abort ();
 201     }
 202 }
 203
 204 static long
 205 gomp_adjust_sched (long sched, long *chunk_size)
 206 {
 207   sched &= ~GFS_MONOTONIC;
 208   switch (sched)
 209     {
 210     case GFS_STATIC:
 211     case GFS_DYNAMIC:
 212     case GFS_GUIDED:
 213       return sched;
 214     /* GFS_RUNTIME is used for runtime schedule without monotonic
 215        or nonmonotonic modifiers on the clause.
 216        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
 217        modifier.  */
 218     case GFS_RUNTIME:
 219     /* GFS_AUTO is used for runtime schedule with nonmonotonic
 220        modifier.  */
 221     case GFS_AUTO:
 222       {
 223         struct gomp_task_icv *icv = gomp_icv (false);
 224         sched = icv->run_sched_var & ~GFS_MONOTONIC;
 225         switch (sched)
 226           {
 227           case GFS_STATIC:
 228           case GFS_DYNAMIC:
 229           case GFS_GUIDED:
 230             *chunk_size = icv->run_sched_chunk_size;
 231             break;
 232           case GFS_AUTO:
 233             sched = GFS_STATIC;
 234             *chunk_size = 0;
 235             break;
 236           default:
 237             abort ();
 238           }
 239         return sched;
 240       }
 241     default:
 242       abort ();
 243     }
 244 }
 245
 246 bool
 247 GOMP_loop_start (long start, long end, long incr, long sched,
 248                  long chunk_size, long *istart, long *iend,
 249                  uintptr_t *reductions, void **mem)
 250 {
 251   struct gomp_thread *thr = gomp_thread ();
 252
 253   thr->ts.static_trip = 0;
 254   if (reductions)
 255     gomp_workshare_taskgroup_start ();
 256   if (gomp_work_share_start (0))
 257     {
 258       sched = gomp_adjust_sched (sched, &chunk_size);
 259       gomp_loop_init (thr->ts.work_share, start, end, incr,
 260                       sched, chunk_size);
 261       if (reductions)
 262         {
 263           GOMP_taskgroup_reduction_register (reductions);
 264           thr->task->taskgroup->workshare = true;
 265           thr->ts.work_share->task_reductions = reductions;
 266         }
 267       if (mem)
 268         {
 269           uintptr_t size = (uintptr_t) *mem;
 270 #define INLINE_ORDERED_TEAM_IDS_OFF \
 271   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)          \
 272     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
 273           if (sizeof (struct gomp_work_share)
 274               <= INLINE_ORDERED_TEAM_IDS_OFF
 275               || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
 276               || size > (sizeof (struct gomp_work_share)
 277                         - INLINE_ORDERED_TEAM_IDS_OFF))
 278             *mem
 279               = (void *) (thr->ts.work_share->ordered_team_ids
 280                           = gomp_malloc_cleared (size));
 281           else
 282             *mem = memset (((char *) thr->ts.work_share)
 283                            + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
 284         }
 285       gomp_work_share_init_done ();
 286     }
 287   else
 288     {
 289       if (reductions)
 290         {
 291           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 292           gomp_workshare_task_reduction_register (reductions,
 293                                                   first_reductions);
 294         }
 295       if (mem)
 296         {
 297           if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
 298                & (__alignof__ (long long) - 1)) == 0)
 299             *mem = (void *) thr->ts.work_share->ordered_team_ids;
 300           else
 301             {
 302               uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
 303               p += __alignof__ (long long) - 1;
 304               p &= ~(__alignof__ (long long) - 1);
 305               *mem = (void *) p;
 306             }
 307         }
 308     }
 309
 310   if (!istart)
 311     return true;
 312   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 313 }
 314
 315 /* The *_ordered_*_start routines are similar.  The only difference is that
 316    this work-share construct is initialized to expect an ORDERED section.  */
 317
 318 static bool
 319 gomp_loop_ordered_static_start (long start, long end, long incr,
 320                                 long chunk_size, long *istart, long *iend)
 321 {
 322   struct gomp_thread *thr = gomp_thread ();
 323
 324   thr->ts.static_trip = 0;
 325   if (gomp_work_share_start (1))
 326     {
 327       gomp_loop_init (thr->ts.work_share, start, end, incr,
 328                       GFS_STATIC, chunk_size);
 329       gomp_ordered_static_init ();
 330       gomp_work_share_init_done ();
 331     }
 332
 333   return !gomp_iter_static_next (istart, iend);
 334 }
 335
 336 static bool
 337 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
 338                                  long chunk_size, long *istart, long *iend)
 339 {
 340   struct gomp_thread *thr = gomp_thread ();
 341   bool ret;
 342
 343   if (gomp_work_share_start (1))
 344     {
 345       gomp_loop_init (thr->ts.work_share, start, end, incr,
 346                       GFS_DYNAMIC, chunk_size);
 347       gomp_mutex_lock (&thr->ts.work_share->lock);
 348       gomp_work_share_init_done ();
 349     }
 350   else
 351     gomp_mutex_lock (&thr->ts.work_share->lock);
 352
 353   ret = gomp_iter_dynamic_next_locked (istart, iend);
 354   if (ret)
 355     gomp_ordered_first ();
 356   gomp_mutex_unlock (&thr->ts.work_share->lock);
 357
 358   return ret;
 359 }
 360
 361 static bool
 362 gomp_loop_ordered_guided_start (long start, long end, long incr,
 363                                 long chunk_size, long *istart, long *iend)
 364 {
 365   struct gomp_thread *thr = gomp_thread ();
 366   bool ret;
 367
 368   if (gomp_work_share_start (1))
 369     {
 370       gomp_loop_init (thr->ts.work_share, start, end, incr,
 371                       GFS_GUIDED, chunk_size);
 372       gomp_mutex_lock (&thr->ts.work_share->lock);
 373       gomp_work_share_init_done ();
 374     }
 375   else
 376     gomp_mutex_lock (&thr->ts.work_share->lock);
 377
 378   ret = gomp_iter_guided_next_locked (istart, iend);
 379   if (ret)
 380     gomp_ordered_first ();
 381   gomp_mutex_unlock (&thr->ts.work_share->lock);
 382
 383   return ret;
 384 }
 385
 386 bool
 387 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
 388                                  long *istart, long *iend)
 389 {
 390   struct gomp_task_icv *icv = gomp_icv (false);
 391   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 392     {
 393     case GFS_STATIC:
 394       return gomp_loop_ordered_static_start (start, end, incr,
 395                                              icv->run_sched_chunk_size,
 396                                              istart, iend);
 397     case GFS_DYNAMIC:
 398       return gomp_loop_ordered_dynamic_start (start, end, incr,
 399                                               icv->run_sched_chunk_size,
 400                                               istart, iend);
 401     case GFS_GUIDED:
 402       return gomp_loop_ordered_guided_start (start, end, incr,
 403                                              icv->run_sched_chunk_size,
 404                                              istart, iend);
 405     case GFS_AUTO:
 406       /* For now map to schedule(static), later on we could play with feedback
 407          driven choice.  */
 408       return gomp_loop_ordered_static_start (start, end, incr,
 409                                              0, istart, iend);
 410     default:
 411       abort ();
 412     }
 413 }
 414
 415 bool
 416 GOMP_loop_ordered_start (long start, long end, long incr, long sched,
 417                          long chunk_size, long *istart, long *iend,
 418                          uintptr_t *reductions, void **mem)
 419 {
 420   struct gomp_thread *thr = gomp_thread ();
 421   size_t ordered = 1;
 422   bool ret;
 423
 424   thr->ts.static_trip = 0;
 425   if (reductions)
 426     gomp_workshare_taskgroup_start ();
 427   if (mem)
 428     ordered += (uintptr_t) *mem;
 429   if (gomp_work_share_start (ordered))
 430     {
 431       sched = gomp_adjust_sched (sched, &chunk_size);
 432       gomp_loop_init (thr->ts.work_share, start, end, incr,
 433                       sched, chunk_size);
 434       if (reductions)
 435         {
 436           GOMP_taskgroup_reduction_register (reductions);
 437           thr->task->taskgroup->workshare = true;
 438           thr->ts.work_share->task_reductions = reductions;
 439         }
 440       if (sched == GFS_STATIC)
 441         gomp_ordered_static_init ();
 442       else
 443         gomp_mutex_lock (&thr->ts.work_share->lock);
 444       gomp_work_share_init_done ();
 445     }
 446   else
 447     {
 448       if (reductions)
 449         {
 450           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 451           gomp_workshare_task_reduction_register (reductions,
 452                                                   first_reductions);
 453         }
 454       sched = thr->ts.work_share->sched;
 455       if (sched != GFS_STATIC)
 456         gomp_mutex_lock (&thr->ts.work_share->lock);
 457     }
 458
 459   if (mem)
 460     {
 461       uintptr_t p
 462         = (uintptr_t) (thr->ts.work_share->ordered_team_ids
 463                        + (thr->ts.team ? thr->ts.team->nthreads : 1));
 464       p += __alignof__ (long long) - 1;
 465       p &= ~(__alignof__ (long long) - 1);
 466       *mem = (void *) p;
 467     }
 468
 469   switch (sched)
 470     {
 471     case GFS_STATIC:
 472     case GFS_AUTO:
 473       return !gomp_iter_static_next (istart, iend);
 474     case GFS_DYNAMIC:
 475       ret = gomp_iter_dynamic_next_locked (istart, iend);
 476       break;
 477     case GFS_GUIDED:
 478       ret = gomp_iter_guided_next_locked (istart, iend);
 479       break;
 480     default:
 481       abort ();
 482     }
 483
 484   if (ret)
 485     gomp_ordered_first ();
 486   gomp_mutex_unlock (&thr->ts.work_share->lock);
 487   return ret;
 488 }
 489
 490 /* The *_doacross_*_start routines are similar.  The only difference is that
 491    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
 492    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
 493    and other COUNTS array elements tell the library number of iterations
 494    in the ordered inner loops.  */
 495
 496 static bool
 497 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
 498                                  long chunk_size, long *istart, long *iend)
 499 {
 500   struct gomp_thread *thr = gomp_thread ();
 501
 502   thr->ts.static_trip = 0;
 503   if (gomp_work_share_start (0))
 504     {
 505       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 506                       GFS_STATIC, chunk_size);
 507       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 508       gomp_work_share_init_done ();
 509     }
 510
 511   return !gomp_iter_static_next (istart, iend);
 512 }
 513
 514 static bool
 515 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
 516                                   long chunk_size, long *istart, long *iend)
 517 {
 518   struct gomp_thread *thr = gomp_thread ();
 519   bool ret;
 520
 521   if (gomp_work_share_start (0))
 522     {
 523       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 524                       GFS_DYNAMIC, chunk_size);
 525       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 526       gomp_work_share_init_done ();
 527     }
 528
 529 #ifdef HAVE_SYNC_BUILTINS
 530   ret = gomp_iter_dynamic_next (istart, iend);
 531 #else
 532   gomp_mutex_lock (&thr->ts.work_share->lock);
 533   ret = gomp_iter_dynamic_next_locked (istart, iend);
 534   gomp_mutex_unlock (&thr->ts.work_share->lock);
 535 #endif
 536
 537   return ret;
 538 }
 539
 540 static bool
 541 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
 542                                  long chunk_size, long *istart, long *iend)
 543 {
 544   struct gomp_thread *thr = gomp_thread ();
 545   bool ret;
 546
 547   if (gomp_work_share_start (0))
 548     {
 549       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 550                       GFS_GUIDED, chunk_size);
 551       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 552       gomp_work_share_init_done ();
 553     }
 554
 555 #ifdef HAVE_SYNC_BUILTINS
 556   ret = gomp_iter_guided_next (istart, iend);
 557 #else
 558   gomp_mutex_lock (&thr->ts.work_share->lock);
 559   ret = gomp_iter_guided_next_locked (istart, iend);
 560   gomp_mutex_unlock (&thr->ts.work_share->lock);
 561 #endif
 562
 563   return ret;
 564 }
 565
 566 bool
 567 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
 568                                   long *istart, long *iend)
 569 {
 570   struct gomp_task_icv *icv = gomp_icv (false);
 571   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 572     {
 573     case GFS_STATIC:
 574       return gomp_loop_doacross_static_start (ncounts, counts,
 575                                               icv->run_sched_chunk_size,
 576                                               istart, iend);
 577     case GFS_DYNAMIC:
 578       return gomp_loop_doacross_dynamic_start (ncounts, counts,
 579                                                icv->run_sched_chunk_size,
 580                                                istart, iend);
 581     case GFS_GUIDED:
 582       return gomp_loop_doacross_guided_start (ncounts, counts,
 583                                               icv->run_sched_chunk_size,
 584                                               istart, iend);
 585     case GFS_AUTO:
 586       /* For now map to schedule(static), later on we could play with feedback
 587          driven choice.  */
 588       return gomp_loop_doacross_static_start (ncounts, counts,
 589                                               0, istart, iend);
 590     default:
 591       abort ();
 592     }
 593 }
 594
 595 bool
 596 GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
 597                           long chunk_size, long *istart, long *iend,
 598                           uintptr_t *reductions, void **mem)
 599 {
 600   struct gomp_thread *thr = gomp_thread ();
 601
 602   thr->ts.static_trip = 0;
 603   if (reductions)
 604     gomp_workshare_taskgroup_start ();
 605   if (gomp_work_share_start (0))
 606     {
 607       size_t extra = 0;
 608       if (mem)
 609         extra = (uintptr_t) *mem;
 610       sched = gomp_adjust_sched (sched, &chunk_size);
 611       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 612                       sched, chunk_size);
 613       gomp_doacross_init (ncounts, counts, chunk_size, extra);
 614       if (reductions)
 615         {
 616           GOMP_taskgroup_reduction_register (reductions);
 617           thr->task->taskgroup->workshare = true;
 618           thr->ts.work_share->task_reductions = reductions;
 619         }
 620       gomp_work_share_init_done ();
 621     }
 622   else
 623     {
 624       if (reductions)
 625         {
 626           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 627           gomp_workshare_task_reduction_register (reductions,
 628                                                   first_reductions);
 629         }
 630       sched = thr->ts.work_share->sched;
 631     }
 632
 633   if (mem)
 634     *mem = thr->ts.work_share->doacross->extra;
 635
 636   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 637 }
 638
 639 /* The *_next routines are called when the thread completes processing of
 640    the iteration block currently assigned to it.  If the work-share
 641    construct is bound directly to a parallel construct, then the iteration
 642    bounds may have been set up before the parallel.  In which case, this
 643    may be the first iteration for the thread.
 644
 645    Returns true if there is work remaining to be performed; *ISTART and
 646    *IEND are filled with a new iteration block.  Returns false if all work
 647    has been assigned.  */
 648
 649 static bool
 650 gomp_loop_static_next (long *istart, long *iend)
 651 {
 652   return !gomp_iter_static_next (istart, iend);
 653 }
 654
 655 static bool
 656 gomp_loop_dynamic_next (long *istart, long *iend)
 657 {
 658   bool ret;
 659
 660 #ifdef HAVE_SYNC_BUILTINS
 661   ret = gomp_iter_dynamic_next (istart, iend);
 662 #else
 663   struct gomp_thread *thr = gomp_thread ();
 664   gomp_mutex_lock (&thr->ts.work_share->lock);
 665   ret = gomp_iter_dynamic_next_locked (istart, iend);
 666   gomp_mutex_unlock (&thr->ts.work_share->lock);
 667 #endif
 668
 669   return ret;
 670 }
 671
 672 static bool
 673 gomp_loop_guided_next (long *istart, long *iend)
 674 {
 675   bool ret;
 676
 677 #ifdef HAVE_SYNC_BUILTINS
 678   ret = gomp_iter_guided_next (istart, iend);
 679 #else
 680   struct gomp_thread *thr = gomp_thread ();
 681   gomp_mutex_lock (&thr->ts.work_share->lock);
 682   ret = gomp_iter_guided_next_locked (istart, iend);
 683   gomp_mutex_unlock (&thr->ts.work_share->lock);
 684 #endif
 685
 686   return ret;
 687 }
 688
 689 bool
 690 GOMP_loop_runtime_next (long *istart, long *iend)
 691 {
 692   struct gomp_thread *thr = gomp_thread ();
 693
 694   switch (thr->ts.work_share->sched)
 695     {
 696     case GFS_STATIC:
 697     case GFS_AUTO:
 698       return gomp_loop_static_next (istart, iend);
 699     case GFS_DYNAMIC:
 700       return gomp_loop_dynamic_next (istart, iend);
 701     case GFS_GUIDED:
 702       return gomp_loop_guided_next (istart, iend);
 703     default:
 704       abort ();
 705     }
 706 }
 707
 708 /* The *_ordered_*_next routines are called when the thread completes
 709    processing of the iteration block currently assigned to it.
 710
 711    Returns true if there is work remaining to be performed; *ISTART and
 712    *IEND are filled with a new iteration block.  Returns false if all work
 713    has been assigned.  */
 714
 715 static bool
 716 gomp_loop_ordered_static_next (long *istart, long *iend)
 717 {
 718   struct gomp_thread *thr = gomp_thread ();
 719   int test;
 720
 721   gomp_ordered_sync ();
 722   gomp_mutex_lock (&thr->ts.work_share->lock);
 723   test = gomp_iter_static_next (istart, iend);
 724   if (test >= 0)
 725     gomp_ordered_static_next ();
 726   gomp_mutex_unlock (&thr->ts.work_share->lock);
 727
 728   return test == 0;
 729 }
 730
 731 static bool
 732 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
 733 {
 734   struct gomp_thread *thr = gomp_thread ();
 735   bool ret;
 736
 737   gomp_ordered_sync ();
 738   gomp_mutex_lock (&thr->ts.work_share->lock);
 739   ret = gomp_iter_dynamic_next_locked (istart, iend);
 740   if (ret)
 741     gomp_ordered_next ();
 742   else
 743     gomp_ordered_last ();
 744   gomp_mutex_unlock (&thr->ts.work_share->lock);
 745
 746   return ret;
 747 }
 748
 749 static bool
 750 gomp_loop_ordered_guided_next (long *istart, long *iend)
 751 {
 752   struct gomp_thread *thr = gomp_thread ();
 753   bool ret;
 754
 755   gomp_ordered_sync ();
 756   gomp_mutex_lock (&thr->ts.work_share->lock);
 757   ret = gomp_iter_guided_next_locked (istart, iend);
 758   if (ret)
 759     gomp_ordered_next ();
 760   else
 761     gomp_ordered_last ();
 762   gomp_mutex_unlock (&thr->ts.work_share->lock);
 763
 764   return ret;
 765 }
 766
 767 bool
 768 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
 769 {
 770   struct gomp_thread *thr = gomp_thread ();
 771
 772   switch (thr->ts.work_share->sched)
 773     {
 774     case GFS_STATIC:
 775     case GFS_AUTO:
 776       return gomp_loop_ordered_static_next (istart, iend);
 777     case GFS_DYNAMIC:
 778       return gomp_loop_ordered_dynamic_next (istart, iend);
 779     case GFS_GUIDED:
 780       return gomp_loop_ordered_guided_next (istart, iend);
 781     default:
 782       abort ();
 783     }
 784 }
 785
 786 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
 787    to avoid one synchronization once we get into the loop.  */
 788
 789 static void
 790 gomp_parallel_loop_start (void (*fn) (void *), void *data,
 791                           unsigned num_threads, long start, long end,
 792                           long incr, enum gomp_schedule_type sched,
 793                           long chunk_size, unsigned int flags)
 794 {
 795   struct gomp_team *team;
 796
 797   num_threads = gomp_resolve_num_threads (num_threads, 0);
 798   team = gomp_new_team (num_threads);
 799   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
 800   gomp_team_start (fn, data, num_threads, flags, team, NULL);
 801 }
 802
 803 void
 804 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
 805                                  unsigned num_threads, long start, long end,
 806                                  long incr, long chunk_size)
 807 {
 808   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 809                             GFS_STATIC, chunk_size, 0);
 810 }
 811
 812 void
 813 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
 814                                   unsigned num_threads, long start, long end,
 815                                   long incr, long chunk_size)
 816 {
 817   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 818                             GFS_DYNAMIC, chunk_size, 0);
 819 }
 820
 821 void
 822 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
 823                                  unsigned num_threads, long start, long end,
 824                                  long incr, long chunk_size)
 825 {
 826   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 827                             GFS_GUIDED, chunk_size, 0);
 828 }
 829
 830 void
 831 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
 832                                   unsigned num_threads, long start, long end,
 833                                   long incr)
 834 {
 835   struct gomp_task_icv *icv = gomp_icv (false);
 836   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 837                             icv->run_sched_var & ~GFS_MONOTONIC,
 838                             icv->run_sched_chunk_size, 0);
 839 }
 840
 841 ialias_redirect (GOMP_parallel_end)
 842
 843 void
 844 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
 845                            unsigned num_threads, long start, long end,
 846                            long incr, long chunk_size, unsigned flags)
 847 {
 848   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 849                             GFS_STATIC, chunk_size, flags);
 850   fn (data);
 851   GOMP_parallel_end ();
 852 }
 853
 854 void
 855 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
 856                             unsigned num_threads, long start, long end,
 857                             long incr, long chunk_size, unsigned flags)
 858 {
 859   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 860                             GFS_DYNAMIC, chunk_size, flags);
 861   fn (data);
 862   GOMP_parallel_end ();
 863 }
 864
 865 void
 866 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
 867                           unsigned num_threads, long start, long end,
 868                           long incr, long chunk_size, unsigned flags)
 869 {
 870   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 871                             GFS_GUIDED, chunk_size, flags);
 872   fn (data);
 873   GOMP_parallel_end ();
 874 }
 875
 876 void
 877 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
 878                             unsigned num_threads, long start, long end,
 879                             long incr, unsigned flags)
 880 {
 881   struct gomp_task_icv *icv = gomp_icv (false);
 882   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 883                             icv->run_sched_var & ~GFS_MONOTONIC,
 884                             icv->run_sched_chunk_size, flags);
 885   fn (data);
 886   GOMP_parallel_end ();
 887 }
 888
 889 #ifdef HAVE_ATTRIBUTE_ALIAS
 890 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
 891         __attribute__((alias ("GOMP_parallel_loop_dynamic")));
 892 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
 893         __attribute__((alias ("GOMP_parallel_loop_guided")));
 894 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
 895         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 896 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
 897         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 898 #else
 899 void
 900 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
 901                                          unsigned num_threads, long start,
 902                                          long end, long incr, long chunk_size,
 903                                          unsigned flags)
 904 {
 905   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 906                             GFS_DYNAMIC, chunk_size, flags);
 907   fn (data);
 908   GOMP_parallel_end ();
 909 }
 910
 911 void
 912 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
 913                                         unsigned num_threads, long start,
 914                                         long end, long incr, long chunk_size,
 915                                         unsigned flags)
 916 {
 917   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 918                             GFS_GUIDED, chunk_size, flags);
 919   fn (data);
 920   GOMP_parallel_end ();
 921 }
 922
 923 void
 924 GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
 925                                          unsigned num_threads, long start,
 926                                          long end, long incr, unsigned flags)
 927 {
 928   struct gomp_task_icv *icv = gomp_icv (false);
 929   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 930                             icv->run_sched_var & ~GFS_MONOTONIC,
 931                             icv->run_sched_chunk_size, flags);
 932   fn (data);
 933   GOMP_parallel_end ();
 934 }
 935
 936 void
 937 GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
 938                                                unsigned num_threads, long start,
 939                                                long end, long incr,
 940                                                unsigned flags)
 941 {
 942   struct gomp_task_icv *icv = gomp_icv (false);
 943   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 944                             icv->run_sched_var & ~GFS_MONOTONIC,
 945                             icv->run_sched_chunk_size, flags);
 946   fn (data);
 947   GOMP_parallel_end ();
 948 }
 949 #endif
 950
 951 /* The GOMP_loop_end* routines are called after the thread is told that
 952    all loop iterations are complete.  The first two versions synchronize
 953    all threads; the nowait version does not.  */
 954
 955 void
 956 GOMP_loop_end (void)
 957 {
 958   gomp_work_share_end ();
 959 }
 960
 961 bool
 962 GOMP_loop_end_cancel (void)
 963 {
 964   return gomp_work_share_end_cancel ();
 965 }
 966
 967 void
 968 GOMP_loop_end_nowait (void)
 969 {
 970   gomp_work_share_end_nowait ();
 971 }
 972
 973
 974 /* We use static functions above so that we're sure that the "runtime"
 975    function can defer to the proper routine without interposition.  We
 976    export the static function with a strong alias when possible, or with
 977    a wrapper function otherwise.  */
 978
 979 #ifdef HAVE_ATTRIBUTE_ALIAS
 980 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
 981         __attribute__((alias ("gomp_loop_static_start")));
 982 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
 983         __attribute__((alias ("gomp_loop_dynamic_start")));
 984 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
 985         __attribute__((alias ("gomp_loop_guided_start")));
 986 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
 987         __attribute__((alias ("gomp_loop_dynamic_start")));
 988 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
 989         __attribute__((alias ("gomp_loop_guided_start")));
 990 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
 991         __attribute__((alias ("GOMP_loop_runtime_start")));
 992 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
 993         __attribute__((alias ("GOMP_loop_runtime_start")));
 994
 995 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
 996         __attribute__((alias ("gomp_loop_ordered_static_start")));
 997 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
 998         __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
 999 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
1000         __attribute__((alias ("gomp_loop_ordered_guided_start")));
1001
1002 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1003         __attribute__((alias ("gomp_loop_doacross_static_start")));
1004 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1005         __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1006 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1007         __attribute__((alias ("gomp_loop_doacross_guided_start")));
1008
1009 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1010         __attribute__((alias ("gomp_loop_static_next")));
1011 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1012         __attribute__((alias ("gomp_loop_dynamic_next")));
1013 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1014         __attribute__((alias ("gomp_loop_guided_next")));
1015 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1016         __attribute__((alias ("gomp_loop_dynamic_next")));
1017 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1018         __attribute__((alias ("gomp_loop_guided_next")));
1019 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1020         __attribute__((alias ("GOMP_loop_runtime_next")));
1021 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1022         __attribute__((alias ("GOMP_loop_runtime_next")));
1023
1024 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1025         __attribute__((alias ("gomp_loop_ordered_static_next")));
1026 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1027         __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1028 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1029         __attribute__((alias ("gomp_loop_ordered_guided_next")));
1030 #else
1031 bool
1032 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1033                         long *istart, long *iend)
1034 {
1035   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1036 }
1037
1038 bool
1039 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1040                          long *istart, long *iend)
1041 {
1042   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1043 }
1044
1045 bool
1046 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1047                         long *istart, long *iend)
1048 {
1049   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1050 }
1051
1052 bool
1053 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1054                                       long chunk_size, long *istart,
1055                                       long *iend)
1056 {
1057   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1058 }
1059
1060 bool
1061 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1062                                      long chunk_size, long *istart, long *iend)
1063 {
1064   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1065 }
1066
1067 bool
1068 GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1069                                       long *istart, long *iend)
1070 {
1071   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1072 }
1073
1074 bool
1075 GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1076                                             long *istart, long *iend)
1077 {
1078   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1079 }
1080
1081 bool
1082 GOMP_loop_ordered_static_start (long start, long end, long incr,
1083                                 long chunk_size, long *istart, long *iend)
1084 {
1085   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1086                                          istart, iend);
1087 }
1088
1089 bool
1090 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1091                                  long chunk_size, long *istart, long *iend)
1092 {
1093   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1094                                           istart, iend);
1095 }
1096
1097 bool
1098 GOMP_loop_ordered_guided_start (long start, long end, long incr,
1099                                 long chunk_size, long *istart, long *iend)
1100 {
1101   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1102                                          istart, iend);
1103 }
1104
1105 bool
1106 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1107                                  long chunk_size, long *istart, long *iend)
1108 {
1109   return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1110                                           istart, iend);
1111 }
1112
1113 bool
1114 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1115                                   long chunk_size, long *istart, long *iend)
1116 {
1117   return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1118                                            istart, iend);
1119 }
1120
1121 bool
1122 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1123                                  long chunk_size, long *istart, long *iend)
1124 {
1125   return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1126                                           istart, iend);
1127 }
1128
1129 bool
1130 GOMP_loop_static_next (long *istart, long *iend)
1131 {
1132   return gomp_loop_static_next (istart, iend);
1133 }
1134
1135 bool
1136 GOMP_loop_dynamic_next (long *istart, long *iend)
1137 {
1138   return gomp_loop_dynamic_next (istart, iend);
1139 }
1140
1141 bool
1142 GOMP_loop_guided_next (long *istart, long *iend)
1143 {
1144   return gomp_loop_guided_next (istart, iend);
1145 }
1146
1147 bool
1148 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1149 {
1150   return gomp_loop_dynamic_next (istart, iend);
1151 }
1152
1153 bool
1154 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1155 {
1156   return gomp_loop_guided_next (istart, iend);
1157 }
1158
1159 bool
1160 GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1161 {
1162   return GOMP_loop_runtime_next (istart, iend);
1163 }
1164
1165 bool
1166 GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1167 {
1168   return GOMP_loop_runtime_next (istart, iend);
1169 }
1170
1171 bool
1172 GOMP_loop_ordered_static_next (long *istart, long *iend)
1173 {
1174   return gomp_loop_ordered_static_next (istart, iend);
1175 }
1176
1177 bool
1178 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1179 {
1180   return gomp_loop_ordered_dynamic_next (istart, iend);
1181 }
1182
1183 bool
1184 GOMP_loop_ordered_guided_next (long *istart, long *iend)
1185 {
1186   return gomp_loop_ordered_guided_next (istart, iend);
1187 }
1188 #endif