libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c

   1 #include <assert.h>
   2
   3 /* Test of reduction on both parallel and loop directives (workers and vectors
   4    together in gang-partitioned mode, float type, multiple reductions).  */
   5
   6 int
   7 main (int argc, char *argv[])
   8 {
   9   int i, j;
  10   float arr[32768];
  11   float res = 0, mres = 0, hres = 0, hmres = 0;
  12
  13   for (i = 0; i < 32768; i++)
  14     arr[i] = i % (32768 / 64);
  15
  16   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
  17     reduction(+:res) reduction(max:mres) copy(res, mres)
  18   {
  19     #pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'm\?res'" "TODO" } */
  20     for (j = 0; j < 32; j++)
  21       {
  22         #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
  23         for (i = 0; i < 1024; i++)
  24           {
  25             res += arr[j * 1024 + i];
  26             if (arr[j * 1024 + i] > mres)
  27               mres = arr[j * 1024 + i];
  28           }
  29
  30         #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
  31         for (i = 0; i < 1024; i++)
  32           {
  33             res += arr[j * 1024 + (1023 - i)];
  34             if (arr[j * 1024 + (1023 - i)] > mres)
  35               mres = arr[j * 1024 + (1023 - i)];
  36           }
  37       }
  38   }
  39
  40   for (j = 0; j < 32; j++)
  41     for (i = 0; i < 1024; i++)
  42       {
  43         hres += arr[j * 1024 + i];
  44         hres += arr[j * 1024 + (1023 - i)];
  45         if (arr[j * 1024 + i] > hmres)
  46           hmres = arr[j * 1024 + i];
  47         if (arr[j * 1024 + (1023 - i)] > hmres)
  48           hmres = arr[j * 1024 + (1023 - i)];
  49       }
  50
  51   assert (hres <= 16777216);
  52   assert (res == hres);
  53
  54   assert (hmres <= 16777216);
  55   assert (mres == hmres);
  56
  57   return 0;
  58 }