3 /* Test of reduction on both parallel and loop directives (worker and
4 vector-partitioned loops individually in gang-partitioned mode, int
8 main (int argc
, char *argv
[])
10 int i
, j
, arr
[32768], res
= 0, hres
= 0;
12 for (i
= 0; i
< 32768; i
++)
15 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
16 reduction(+:res) copy(res)
19 for (j
= 0; j
< 32; j
++)
21 #pragma acc loop worker reduction(+:res)
22 for (i
= 0; i
< 1024; i
++)
23 res
+= arr
[j
* 1024 + i
];
25 #pragma acc loop vector reduction(+:res)
26 for (i
= 1023; i
>= 0; i
--)
27 res
+= arr
[j
* 1024 + i
];
31 for (j
= 0; j
< 32; j
++)
32 for (i
= 0; i
< 1024; i
++)
33 hres
+= arr
[j
* 1024 + i
] * 2;