3 /* Test of reduction on both parallel and loop directives (workers and vectors
4 together in gang-partitioned mode, float type). */
7 main (int argc
, char *argv
[])
11 float res
= 0, hres
= 0;
13 for (i
= 0; i
< 32768; i
++)
14 arr
[i
] = i
% (32768 / 64);
16 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
17 reduction(+:res) copy(res)
19 #pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'res'" "TODO" } */
20 for (j
= 0; j
< 32; j
++)
22 #pragma acc loop worker vector reduction(+:res)
23 for (i
= 0; i
< 1024; i
++)
24 res
+= arr
[j
* 1024 + i
];
26 #pragma acc loop worker vector reduction(+:res)
27 for (i
= 0; i
< 1024; i
++)
28 res
+= arr
[j
* 1024 + (1023 - i
)];
32 for (j
= 0; j
< 32; j
++)
33 for (i
= 0; i
< 1024; i
++)
35 hres
+= arr
[j
* 1024 + i
];
36 hres
+= arr
[j
* 1024 + (1023 - i
)];
39 assert (hres
<= 16777216);