libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c

   1 /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
   2    vector_length.  */
   3
   4 /* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
   5
   6 /* { dg-additional-options "--param=openacc-kernels=decompose" } */
   7
   8 /* { dg-additional-options "-fopt-info-all-omp" }
   9    { dg-additional-options "-foffload=-fopt-info-all-omp" } */
  10
  11 /* { dg-additional-options "--param=openacc-privatization=noisy" }
  12    { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
  13    Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
  14    { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
  15
  16 /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
  17    passed to 'incr' may be unset, and in that case, it will be set to [...]",
  18    so to maintain compatibility with earlier Tcl releases, we manually
  19    initialize counter variables:
  20    { dg-line l_dummy[variable c_compute 0 c_loop_i 0 c_loop_j 0 c_loop_k 0] }
  21    { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
  22    "WARNING: dg-line var l_dummy defined, but not used".  */
  23
  24 /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
  25    aspects of that functionality.  */
  26
  27 /* See also '../libgomp.oacc-fortran/parallel-dims.f90'.  */
  28
  29 #include <limits.h>
  30 #include <openacc.h>
  31 #include <gomp-constants.h>
  32
  33 #pragma acc routine seq
  34 inline __attribute__ ((always_inline))
  35 static int acc_gang ()
  36 {
  37   return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
  38 }
  39
  40 #pragma acc routine seq
  41 inline __attribute__ ((always_inline))
  42 static int acc_worker ()
  43 {
  44   return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
  45 }
  46
  47 #pragma acc routine seq
  48 inline __attribute__ ((always_inline))
  49 static int acc_vector ()
  50 {
  51   return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
  52 }
  53
  54 #ifdef EXPENSIVE
  55 #define N 100
  56 #else
  57 #define N 50
  58 #endif
  59
  60 int main ()
  61 {
  62   acc_init (acc_device_default);
  63
  64   /* OpenACC parallel construct.  */
  65
  66   /* Non-positive value.  */
  67
  68   /* GR, WS, VS.  */
  69   {
  70 #define GANGS 0
  71     /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
  72     int gangs_actual = GANGS;
  73     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
  74     gangs_min = workers_min = vectors_min = INT_MAX;
  75     gangs_max = workers_max = vectors_max = INT_MIN;
  76 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
  77   copy (gangs_actual) \
  78   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
  79   num_gangs (GANGS)
  80     /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
  81     /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
  82     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
  83     {
  84       /* We're actually executing with num_gangs (1).  */
  85       gangs_actual = 1;
  86       for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
  87         {
  88           gangs_min = gangs_max = acc_gang ();
  89           workers_min = workers_max = acc_worker ();
  90           vectors_min = vectors_max = acc_vector ();
  91         }
  92     }
  93     if (gangs_actual != 1)
  94       __builtin_abort ();
  95     if (gangs_min != 0 || gangs_max != gangs_actual - 1
  96         || workers_min != 0 || workers_max != 0
  97         || vectors_min != 0 || vectors_max != 0)
  98       __builtin_abort ();
  99 #undef GANGS
 100   }
 101
 102   /* GP, WS, VS.  */
 103   {
 104 #define GANGS 0
 105     /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
 106     int gangs_actual = GANGS;
 107     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 108     gangs_min = workers_min = vectors_min = INT_MAX;
 109     gangs_max = workers_max = vectors_max = INT_MIN;
 110 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 111   copy (gangs_actual) \
 112   num_gangs (GANGS)
 113     /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
 114     /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
 115     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 116     /* { dg-warning {region contains gang partitioned code but is not gang partitioned} {} { target *-*-* } l_compute$c_compute } */
 117     {
 118       /* We're actually executing with num_gangs (1).  */
 119       gangs_actual = 1;
 120 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 121   gang \
 122   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 123       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 124       /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 125       for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
 126         {
 127           gangs_min = gangs_max = acc_gang ();
 128           workers_min = workers_max = acc_worker ();
 129           vectors_min = vectors_max = acc_vector ();
 130         }
 131     }
 132     if (gangs_actual != 1)
 133       __builtin_abort ();
 134     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 135         || workers_min != 0 || workers_max != 0
 136         || vectors_min != 0 || vectors_max != 0)
 137       __builtin_abort ();
 138 #undef GANGS
 139   }
 140
 141   /* GR, WP, VS.  */
 142   {
 143 #define WORKERS 0
 144     /* { dg-warning {'num_workers' value must be positive} {} { target c } .-1 } */
 145     int workers_actual = WORKERS;
 146     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 147     gangs_min = workers_min = vectors_min = INT_MAX;
 148     gangs_max = workers_max = vectors_max = INT_MIN;
 149 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 150   copy (workers_actual) \
 151   num_workers (WORKERS)
 152     /* { dg-note {in expansion of macro 'WORKERS'} {} { target c } .-1 } */
 153     /* { dg-warning {'num_workers' value must be positive} {} { target c++ } .-2 } */
 154     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 155     /* { dg-warning {region contains worker partitioned code but is not worker partitioned} {} { target *-*-* } l_compute$c_compute } */
 156     {
 157       /* We're actually executing with num_workers (1).  */
 158       workers_actual = 1;
 159 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 160   worker \
 161   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 162       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 163       /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 164       for (int i = N * workers_actual; i > -N * workers_actual; --i)
 165         {
 166           gangs_min = gangs_max = acc_gang ();
 167           workers_min = workers_max = acc_worker ();
 168           vectors_min = vectors_max = acc_vector ();
 169         }
 170     }
 171     if (workers_actual != 1)
 172       __builtin_abort ();
 173     if (gangs_min != 0 || gangs_max != 0
 174         || workers_min != 0 || workers_max != workers_actual - 1
 175         || vectors_min != 0 || vectors_max != 0)
 176       __builtin_abort ();
 177 #undef WORKERS
 178   }
 179
 180   /* GR, WS, VP.  */
 181   {
 182 #define VECTORS 0
 183     /* { dg-warning {'vector_length' value must be positive} {} { target c } .-1 } */
 184     int vectors_actual = VECTORS;
 185     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 186     gangs_min = workers_min = vectors_min = INT_MAX;
 187     gangs_max = workers_max = vectors_max = INT_MIN;
 188 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 189   copy (vectors_actual) \
 190   vector_length (VECTORS)
 191     /* { dg-note {in expansion of macro 'VECTORS'} {} { target c } .-1 } */
 192     /* { dg-warning {'vector_length' value must be positive} {} { target c++ } .-2 } */
 193     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 194     /* { dg-warning {region contains vector partitioned code but is not vector partitioned} {} { target *-*-* } l_compute$c_compute } */
 195     /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 196     {
 197       /* We're actually executing with vector_length (1), just the GCC nvptx
 198          back end enforces vector_length (32).  */
 199       if (acc_on_device (acc_device_nvidia))
 200         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 201            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 202         vectors_actual = 32;
 203       else
 204         vectors_actual = 1;
 205 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 206   vector \
 207   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 208       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 209       /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 210       for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
 211         {
 212           gangs_min = gangs_max = acc_gang ();
 213           workers_min = workers_max = acc_worker ();
 214           vectors_min = vectors_max = acc_vector ();
 215         }
 216     }
 217     if (acc_get_device_type () == acc_device_nvidia)
 218       {
 219         if (vectors_actual != 32)
 220           __builtin_abort ();
 221       }
 222     else
 223       if (vectors_actual != 1)
 224         __builtin_abort ();
 225     if (gangs_min != 0 || gangs_max != 0
 226         || workers_min != 0 || workers_max != 0
 227         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 228       __builtin_abort ();
 229 #undef VECTORS
 230   }
 231
 232
 233   /* High value.  */
 234
 235   /* GR, WS, VS.  */
 236   {
 237     /* There is no actual limit for the number of gangs, so we try with a
 238        rather high value.  */
 239     int gangs = 12345;
 240     int gangs_actual = gangs;
 241     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 242     gangs_min = workers_min = vectors_min = INT_MAX;
 243     gangs_max = workers_max = vectors_max = INT_MIN;
 244 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 245   copy (gangs_actual) \
 246   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
 247   num_gangs (gangs)
 248     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 249     /* { dg-bogus {warning: region is gang partitioned but does not contain gang partitioned code} {TODO 'reduction'} { xfail *-*-* } l_compute$c_compute } */
 250     {
 251       if (acc_on_device (acc_device_host))
 252         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 253            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 254         {
 255           /* We're actually executing with num_gangs (1).  */
 256           gangs_actual = 1;
 257         }
 258       /* As we're executing GR not GP, don't multiply with a "gangs_actual"
 259          factor.  */
 260       for (int i = N /* * gangs_actual */; i > -N /* * gangs_actual */; --i)
 261         {
 262           gangs_min = gangs_max = acc_gang ();
 263           workers_min = workers_max = acc_worker ();
 264           vectors_min = vectors_max = acc_vector ();
 265         }
 266     }
 267     if (gangs_actual < 1)
 268       __builtin_abort ();
 269     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 270         || workers_min != 0 || workers_max != 0
 271         || vectors_min != 0 || vectors_max != 0)
 272       __builtin_abort ();
 273   }
 274
 275   /* GP, WS, VS.  */
 276   {
 277     /* There is no actual limit for the number of gangs, so we try with a
 278        rather high value.  */
 279     int gangs = 12345;
 280     int gangs_actual = gangs;
 281     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 282     gangs_min = workers_min = vectors_min = INT_MAX;
 283     gangs_max = workers_max = vectors_max = INT_MIN;
 284 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 285   copy (gangs_actual) \
 286   num_gangs (gangs)
 287     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 288     {
 289       if (acc_on_device (acc_device_host))
 290         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 291            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 292         {
 293           /* We're actually executing with num_gangs (1).  */
 294           gangs_actual = 1;
 295         }
 296 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 297   gang \
 298   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 299       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 300       /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 301       for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
 302         {
 303           gangs_min = gangs_max = acc_gang ();
 304           workers_min = workers_max = acc_worker ();
 305           vectors_min = vectors_max = acc_vector ();
 306         }
 307     }
 308     if (gangs_actual < 1)
 309       __builtin_abort ();
 310     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 311         || workers_min != 0 || workers_max != 0
 312         || vectors_min != 0 || vectors_max != 0)
 313       __builtin_abort ();
 314   }
 315
 316   /* GR, WP, VS.  */
 317   {
 318     /* We try with an outrageously large value. */
 319 #define WORKERS 2 << 20
 320     int workers_actual = WORKERS;
 321     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 322     gangs_min = workers_min = vectors_min = INT_MAX;
 323     gangs_max = workers_max = vectors_max = INT_MIN;
 324 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 325   copy (workers_actual) \
 326   num_workers (WORKERS)
 327     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 328     /* { dg-warning {using 'num_workers \(32\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 329     {
 330       if (acc_on_device (acc_device_host))
 331         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 332            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 333         {
 334           /* We're actually executing with num_workers (1).  */
 335           workers_actual = 1;
 336         }
 337       else if (acc_on_device (acc_device_nvidia))
 338         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 339            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 340         {
 341           /* The GCC nvptx back end enforces num_workers (32).  */
 342           workers_actual = 32;
 343         }
 344       else if (acc_on_device (acc_device_radeon))
 345         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 346            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 347         {
 348           /* The GCC GCN back end is limited to num_workers (16).  */
 349           workers_actual = 16;
 350         }
 351       else
 352         __builtin_abort ();
 353 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 354   worker \
 355   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 356       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 357       /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 358       for (int i = N * workers_actual; i > -N * workers_actual; --i)
 359         {
 360           gangs_min = gangs_max = acc_gang ();
 361           workers_min = workers_max = acc_worker ();
 362           vectors_min = vectors_max = acc_vector ();
 363         }
 364     }
 365     if (workers_actual < 1)
 366       __builtin_abort ();
 367     if (gangs_min != 0 || gangs_max != 0
 368         || workers_min != 0 || workers_max != workers_actual - 1
 369         || vectors_min != 0 || vectors_max != 0)
 370       __builtin_abort ();
 371 #undef WORKERS
 372   }
 373
 374   /* GR, WP, VS.  */
 375   {
 376     /* We try with an outrageously large value. */
 377     int workers = 2 << 20;
 378     /* For nvptx offloading, this one will not result in "using num_workers
 379        (32), ignoring runtime setting", and will in fact try to launch with
 380        "num_workers (workers)", which will run into "libgomp: cuLaunchKernel
 381        error: invalid argument".  So, limit ourselves here.  */
 382     if (acc_get_device_type () == acc_device_nvidia)
 383       workers = 32;
 384     int workers_actual = workers;
 385     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 386     gangs_min = workers_min = vectors_min = INT_MAX;
 387     gangs_max = workers_max = vectors_max = INT_MIN;
 388 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 389   copy (workers_actual) \
 390   num_workers (workers)
 391     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 392     {
 393       if (acc_on_device (acc_device_host))
 394         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 395            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 396         {
 397           /* We're actually executing with num_workers (1).  */
 398           workers_actual = 1;
 399         }
 400       else if (acc_on_device (acc_device_nvidia))
 401         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 402            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 403         {
 404           /* We're actually executing with num_workers (32).  */
 405           /* workers_actual = 32; */
 406         }
 407       else if (acc_on_device (acc_device_radeon))
 408         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 409            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 410         {
 411           /* The GCC GCN back end is limited to num_workers (16).  */
 412           workers_actual = 16;
 413         }
 414       else
 415         __builtin_abort ();
 416 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 417   worker \
 418   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 419       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 420       /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 421       for (int i = N * workers_actual; i > -N * workers_actual; --i)
 422         {
 423           gangs_min = gangs_max = acc_gang ();
 424           workers_min = workers_max = acc_worker ();
 425           vectors_min = vectors_max = acc_vector ();
 426         }
 427     }
 428     if (workers_actual < 1)
 429       __builtin_abort ();
 430     if (gangs_min != 0 || gangs_max != 0
 431         || workers_min != 0 || workers_max != workers_actual - 1
 432         || vectors_min != 0 || vectors_max != 0)
 433       __builtin_abort ();
 434   }
 435
 436   /* GR, WS, VP.  */
 437   {
 438     /* We try with an outrageously large value. */
 439 #define VECTORS 2 << 20
 440     int vectors_actual = VECTORS;
 441     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 442     gangs_min = workers_min = vectors_min = INT_MAX;
 443     gangs_max = workers_max = vectors_max = INT_MIN;
 444 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 445   copy (vectors_actual) \
 446   vector_length (VECTORS)
 447     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 448     /* { dg-warning {using 'vector_length \(1024\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 449     {
 450       if (acc_on_device (acc_device_host))
 451         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 452            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 453         {
 454           /* We're actually executing with vector_length (1).  */
 455           vectors_actual = 1;
 456         }
 457       else if (acc_on_device (acc_device_nvidia))
 458         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 459            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 460         {
 461           /* The GCC nvptx back end reduces to vector_length (1024).  */
 462           vectors_actual = 1024;
 463         }
 464       else if (acc_on_device (acc_device_radeon))
 465         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 466            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 467         {
 468           /* The GCC GCN back end enforces vector_length (1): autovectorize. */
 469           vectors_actual = 1;
 470         }
 471       else
 472         __builtin_abort ();
 473 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 474   vector \
 475   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 476       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 477       /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 478       for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
 479         {
 480           gangs_min = gangs_max = acc_gang ();
 481           workers_min = workers_max = acc_worker ();
 482           vectors_min = vectors_max = acc_vector ();
 483         }
 484     }
 485     if (vectors_actual < 1)
 486       __builtin_abort ();
 487     if (gangs_min != 0 || gangs_max != 0
 488         || workers_min != 0 || workers_max != 0
 489         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 490       __builtin_abort ();
 491 #undef VECTORS
 492   }
 493
 494   /* GR, WS, VP.  */
 495   {
 496     /* We try with an outrageously large value. */
 497     int vectors = 2 << 20;
 498     int vectors_actual = vectors;
 499     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 500     gangs_min = workers_min = vectors_min = INT_MAX;
 501     gangs_max = workers_max = vectors_max = INT_MIN;
 502 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 503   copy (vectors_actual) \
 504   vector_length (vectors)
 505     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 506     /* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 507     {
 508       if (acc_on_device (acc_device_host))
 509         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 510            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 511         {
 512           /* We're actually executing with vector_length (1).  */
 513           vectors_actual = 1;
 514         }
 515       else if (acc_on_device (acc_device_nvidia))
 516         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 517            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 518         {
 519           /* The GCC nvptx back end enforces vector_length (32).  */
 520           vectors_actual = 32;
 521         }
 522       else if (acc_on_device (acc_device_radeon))
 523         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 524            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 525         {
 526           /* Because of the way vectors are implemented for GCN, a vector loop
 527              containing a seq routine call will not vectorize calls to that
 528              routine.  Hence, we'll only get one "vector".  */
 529           vectors_actual = 1;
 530         }
 531       else
 532         __builtin_abort ();
 533 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 534   vector \
 535   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 536       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 537       /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 538       for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
 539         {
 540           gangs_min = gangs_max = acc_gang ();
 541           workers_min = workers_max = acc_worker ();
 542           vectors_min = vectors_max = acc_vector ();
 543         }
 544     }
 545     if (vectors_actual < 1)
 546       __builtin_abort ();
 547     if (gangs_min != 0 || gangs_max != 0
 548         || workers_min != 0 || workers_max != 0
 549         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 550       __builtin_abort ();
 551   }
 552
 553
 554   /* Composition of GP, WP, VP.  */
 555   {
 556     int gangs = 12345;
 557     /* With nvptx offloading, multi-level reductions apparently are very slow
 558        in the following case.  So, limit ourselves here.  */
 559     if (acc_get_device_type () == acc_device_nvidia)
 560       gangs = 3;
 561     /* Similar appears to be true for GCN.  */
 562     if (acc_get_device_type () == acc_device_radeon)
 563       gangs = 3;
 564     int gangs_actual = gangs;
 565 #define WORKERS 3
 566     int workers_actual = WORKERS;
 567 #define VECTORS 11
 568     int vectors_actual = VECTORS;
 569     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 570     gangs_min = workers_min = vectors_min = INT_MAX;
 571     gangs_max = workers_max = vectors_max = INT_MIN;
 572 #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
 573   copy (gangs_actual, workers_actual, vectors_actual) \
 574   num_gangs (gangs) \
 575   num_workers (WORKERS) \
 576   vector_length (VECTORS)
 577     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 578     /* { dg-warning {using 'vector_length \(32\)', ignoring 11} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 579     {
 580       if (acc_on_device (acc_device_host))
 581         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 582            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 583         {
 584           /* We're actually executing with num_gangs (1), num_workers (1),
 585              vector_length (1).  */
 586           gangs_actual = 1;
 587           workers_actual = 1;
 588           vectors_actual = 1;
 589         }
 590       else if (acc_on_device (acc_device_nvidia))
 591         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 592            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 593         {
 594           /* The GCC nvptx back end enforces vector_length (32).  */
 595           vectors_actual = 32;
 596         }
 597       else if (acc_on_device (acc_device_radeon))
 598         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 599            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 600         {
 601           /* See above comments about GCN vectors_actual.  */
 602           vectors_actual = 1;
 603         }
 604       else
 605         __builtin_abort ();
 606 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 607   gang \
 608   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 609       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 610       /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 611       /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 612       for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
 613 #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
 614   worker \
 615   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 616         /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
 617         /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
 618         /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
 619         for (int j = N * workers_actual; j > -N * workers_actual; --j)
 620 #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
 621   vector \
 622   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 623           /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
 624           /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
 625           for (int k = N * vectors_actual; k > -N * vectors_actual; --k)
 626             {
 627               gangs_min = gangs_max = acc_gang ();
 628               workers_min = workers_max = acc_worker ();
 629               vectors_min = vectors_max = acc_vector ();
 630             }
 631     }
 632     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 633         || workers_min != 0 || workers_max != workers_actual - 1
 634         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 635       __builtin_abort ();
 636 #undef VECTORS
 637 #undef WORKERS
 638   }
 639
 640
 641   /* OpenACC kernels construct.  */
 642
 643   /* We can't test parallelized OpenACC kernels constructs in this way: use of
 644      the acc_gang, acc_worker, acc_vector functions will make the construct
 645      unparallelizable.  */
 646
 647
 648   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
 649      kernels.  */
 650   {
 651     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 652     gangs_min = workers_min = vectors_min = INT_MAX;
 653     gangs_max = workers_max = vectors_max = INT_MIN;
 654 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
 655     /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 656        { dg-note {variable 'vectors_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 657     /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 658        { dg-note {variable 'vectors_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 659     /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 660        { dg-note {variable 'workers_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 661     /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 662        { dg-note {variable 'workers_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 663     /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 664        { dg-note {variable 'gangs_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 665     /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 666        { dg-note {variable 'gangs_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 667     {
 668 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 669   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 670       /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
 671       /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 672       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 673       /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 674       for (int i = N; i > -N; --i)
 675         {
 676           /* This is to make the loop unparallelizable.  */
 677           asm volatile ("" : : : "memory");
 678
 679           gangs_min = gangs_max = acc_gang ();
 680           workers_min = workers_max = acc_worker ();
 681           vectors_min = vectors_max = acc_vector ();
 682         }
 683     }
 684     if (gangs_min != 0 || gangs_max != 1 - 1
 685         || workers_min != 0 || workers_max != 1 - 1
 686         || vectors_min != 0 || vectors_max != 1 - 1)
 687       __builtin_abort ();
 688   }
 689
 690
 691   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
 692      kernels even when there are explicit num_gangs, num_workers, or
 693      vector_length clauses.  */
 694   {
 695     int gangs = 5;
 696 #define WORKERS 5
 697 #define VECTORS 13
 698     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 699     gangs_min = workers_min = vectors_min = INT_MAX;
 700     gangs_max = workers_max = vectors_max = INT_MIN;
 701 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
 702   num_gangs (gangs) \
 703   num_workers (WORKERS) \
 704   vector_length (VECTORS)
 705     /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 706        { dg-note {variable 'vectors_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 707     /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 708        { dg-note {variable 'vectors_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 709     /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 710        { dg-note {variable 'workers_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 711     /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 712        { dg-note {variable 'workers_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 713     /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 714        { dg-note {variable 'gangs_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
 715     /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
 716        { dg-note {variable 'gangs_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
 717     {
 718 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 719   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 720       /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
 721       /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 722       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 723       /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 724       for (int i = N; i > -N; --i)
 725         {
 726           /* This is to make the loop unparallelizable.  */
 727           asm volatile ("" : : : "memory");
 728
 729           gangs_min = gangs_max = acc_gang ();
 730           workers_min = workers_max = acc_worker ();
 731           vectors_min = vectors_max = acc_vector ();
 732         }
 733     }
 734     if (gangs_min != 0 || gangs_max != 1 - 1
 735         || workers_min != 0 || workers_max != 1 - 1
 736         || vectors_min != 0 || vectors_max != 1 - 1)
 737       __builtin_abort ();
 738 #undef VECTORS
 739 #undef WORKERS
 740   }
 741
 742
 743   /* OpenACC serial construct.  */
 744
 745   /* GR, WS, VS.  */
 746   {
 747     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 748     gangs_min = workers_min = vectors_min = INT_MAX;
 749     gangs_max = workers_max = vectors_max = INT_MIN;
 750 #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
 751   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 752     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 753     /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 754     {
 755       for (int i = N; i > -N; i--)
 756         {
 757           gangs_min = gangs_max = acc_gang ();
 758           workers_min = workers_max = acc_worker ();
 759           vectors_min = vectors_max = acc_vector ();
 760         }
 761     }
 762     if (gangs_min != 0 || gangs_max != 1 - 1
 763         || workers_min != 0 || workers_max != 1 - 1
 764         || vectors_min != 0 || vectors_max != 1 - 1)
 765       __builtin_abort ();
 766   }
 767
 768   /* Composition of GP, WP, VP.  */
 769   {
 770     int vectors_actual = 1;  /* Implicit 'vector_length (1)' clause.  */
 771     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 772     gangs_min = workers_min = vectors_min = INT_MAX;
 773     gangs_max = workers_max = vectors_max = INT_MIN;
 774 #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
 775   copy (vectors_actual) \
 776   copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
 777     /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
 778     /* { dg-bogus {warning: region contains gang partitioned code but is not gang partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
 779        { dg-bogus {warning: region contains worker partitioned code but is not worker partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
 780        { dg-bogus {warning: region contains vector partitioned code but is not vector partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute } */
 781     /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
 782     {
 783       if (acc_on_device (acc_device_nvidia))
 784         /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
 785            ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 786         {
 787           /* The GCC nvptx back end enforces vector_length (32).  */
 788           /* It's unclear if that's actually permissible here;
 789              <https://github.com/OpenACC/openacc-spec/issues/238> "OpenACC
 790              'serial' construct might not actually be serial".  */
 791           vectors_actual = 32;
 792         }
 793 #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
 794   gang \
 795   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 796       /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 797       /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
 798       /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
 799       for (int i = N; i > -N; i--)
 800 #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
 801   worker \
 802   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 803         /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
 804         /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
 805         /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
 806         for (int j = N; j > -N; j--)
 807 #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
 808   vector \
 809   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 810           /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
 811           /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
 812           for (int k = N * vectors_actual; k > -N * vectors_actual; k--)
 813             {
 814               gangs_min = gangs_max = acc_gang ();
 815               workers_min = workers_max = acc_worker ();
 816               vectors_min = vectors_max = acc_vector ();
 817             }
 818     }
 819     if (acc_get_device_type () == acc_device_nvidia)
 820       {
 821         if (vectors_actual != 32)
 822           __builtin_abort ();
 823       }
 824     else
 825       if (vectors_actual != 1)
 826         __builtin_abort ();
 827     if (gangs_min != 0 || gangs_max != 1 - 1
 828         || workers_min != 0 || workers_max != 1 - 1
 829         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 830       __builtin_abort ();
 831   }
 832
 833
 834   return 0;
 835 }