1 /* Tests of reduction on loop directive. */
6 /* Test of reduction on loop directive (gangs, non-private reduction
11 int i
, arr
[1024], res
= 0, hres
= 0;
13 for (i
= 0; i
< 1024; i
++)
16 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
18 #pragma acc loop gang reduction(+:res)
19 for (i
= 0; i
< 1024; i
++)
23 for (i
= 0; i
< 1024; i
++)
30 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
32 #pragma acc loop gang reduction(*:res)
33 for (i
= 0; i
< 12; i
++)
37 for (i
= 0; i
< 12; i
++)
44 /* Test of reduction on loop directive (gangs and vectors, non-private
45 reduction variable). */
49 int i
, arr
[1024], res
= 0, hres
= 0;
51 for (i
= 0; i
< 1024; i
++)
54 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
56 #pragma acc loop gang vector reduction(+:res)
57 for (i
= 0; i
< 1024; i
++)
61 for (i
= 0; i
< 1024; i
++)
68 /* Test of reduction on loop directive (gangs and workers, non-private
69 reduction variable). */
73 int i
, arr
[1024], res
= 0, hres
= 0;
75 for (i
= 0; i
< 1024; i
++)
78 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
80 #pragma acc loop gang worker reduction(+:res)
81 for (i
= 0; i
< 1024; i
++)
85 for (i
= 0; i
< 1024; i
++)
92 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
93 reduction variable). */
97 int i
, arr
[1024], res
= 0, hres
= 0;
99 for (i
= 0; i
< 1024; i
++)
102 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
104 #pragma acc loop gang worker vector reduction(+:res)
105 for (i
= 0; i
< 1024; i
++)
109 for (i
= 0; i
< 1024; i
++)
112 assert (res
== hres
);
116 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
117 reduction variable: separate gang and worker/vector loops). */
121 int i
, j
, arr
[32768], res
= 0, hres
= 0;
123 for (i
= 0; i
< 32768; i
++)
126 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
128 #pragma acc loop gang reduction(+:res)
129 for (j
= 0; j
< 32; j
++)
131 #pragma acc loop worker vector reduction(+:res)
132 for (i
= 0; i
< 1024; i
++)
133 res
+= arr
[j
* 1024 + i
];
135 /* "res" is non-private, and is not available until after the parallel
139 for (i
= 0; i
< 32768; i
++)
142 assert (res
== hres
);
146 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
147 reduction variable: separate gang and worker/vector loops). */
152 double arr
[32768], res
= 0, hres
= 0;
154 for (i
= 0; i
< 32768; i
++)
157 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
160 #pragma acc loop gang reduction(+:res)
161 for (j
= 0; j
< 32; j
++)
163 #pragma acc loop worker vector reduction(+:res)
164 for (i
= 0; i
< 1024; i
++)
165 res
+= arr
[j
* 1024 + i
];
169 for (i
= 0; i
< 32768; i
++)
172 assert (res
== hres
);
176 /* Test of reduction on loop directive (gangs, workers and vectors, multiple
177 non-private reduction variables, float type). */
183 float res
= 0, mres
= 0, hres
= 0, hmres
= 0;
185 for (i
= 0; i
< 32768; i
++)
188 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
190 #pragma acc loop gang reduction(+:res) reduction(max:mres)
191 for (j
= 0; j
< 32; j
++)
193 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
194 for (i
= 0; i
< 1024; i
++)
196 res
+= arr
[j
* 1024 + i
];
197 if (arr
[j
* 1024 + i
] > mres
)
198 mres
= arr
[j
* 1024 + i
];
201 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
202 for (i
= 0; i
< 1024; i
++)
204 res
+= arr
[j
* 1024 + (1023 - i
)];
205 if (arr
[j
* 1024 + (1023 - i
)] > mres
)
206 mres
= arr
[j
* 1024 + (1023 - i
)];
211 for (j
= 0; j
< 32; j
++)
212 for (i
= 0; i
< 1024; i
++)
214 hres
+= arr
[j
* 1024 + i
];
215 hres
+= arr
[j
* 1024 + (1023 - i
)];
216 if (arr
[j
* 1024 + i
] > hmres
)
217 hmres
= arr
[j
* 1024 + i
];
218 if (arr
[j
* 1024 + (1023 - i
)] > hmres
)
219 hmres
= arr
[j
* 1024 + (1023 - i
)];
222 assert (res
== hres
);
223 assert (mres
== hmres
);
227 /* Test of reduction on loop directive (vectors, private reduction
232 int i
, j
, arr
[1024], out
[32], res
= 0, hres
= 0;
234 for (i
= 0; i
< 1024; i
++)
237 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
238 private(res) copyout(out)
240 #pragma acc loop gang
241 for (j
= 0; j
< 32; j
++)
245 #pragma acc loop vector reduction(+:res)
246 for (i
= 0; i
< 32; i
++)
247 res
+= arr
[j
* 32 + i
];
253 for (j
= 0; j
< 32; j
++)
257 for (i
= 0; i
< 32; i
++)
258 hres
+= arr
[j
* 32 + i
];
260 assert (out
[j
] == hres
);
265 /* Test of reduction on loop directive (vector reduction in
266 gang-partitioned/worker-partitioned mode, private reduction variable). */
271 double ina
[1024], inb
[1024], out
[1024], acc
;
273 for (j
= 0; j
< 32; j
++)
274 for (i
= 0; i
< 32; i
++)
276 ina
[j
* 32 + i
] = (i
== j
) ? 2.0 : 0.0;
277 inb
[j
* 32 + i
] = (double) (i
+ j
);
280 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
281 private(acc) copyin(ina, inb) copyout(out)
283 #pragma acc loop gang worker
284 for (k
= 0; k
< 32; k
++)
285 for (j
= 0; j
< 32; j
++)
289 #pragma acc loop vector reduction(+:acc)
290 for (i
= 0; i
< 32; i
++)
291 acc
+= ina
[k
* 32 + i
] * inb
[i
* 32 + j
];
293 out
[k
* 32 + j
] = acc
;
297 for (j
= 0; j
< 32; j
++)
298 for (i
= 0; i
< 32; i
++)
299 assert (out
[j
* 32 + i
] == (i
+ j
) * 2);
303 /* Test of reduction on loop directive (workers, private reduction
308 int i
, j
, arr
[1024], out
[32], res
= 0, hres
= 0;
310 for (i
= 0; i
< 1024; i
++)
313 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
314 private(res) copyout(out)
316 #pragma acc loop gang
317 for (j
= 0; j
< 32; j
++)
321 #pragma acc loop worker reduction(+:res)
322 for (i
= 0; i
< 32; i
++)
323 res
+= arr
[j
* 32 + i
];
329 for (j
= 0; j
< 32; j
++)
333 for (i
= 0; i
< 32; i
++)
334 hres
+= arr
[j
* 32 + i
];
336 assert (out
[j
] == hres
);
341 /* Test of reduction on loop directive (workers and vectors, private reduction
346 int i
, j
, arr
[1024], out
[32], res
= 0, hres
= 0;
348 for (i
= 0; i
< 1024; i
++)
351 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
352 private(res) copyout(out)
354 #pragma acc loop gang
355 for (j
= 0; j
< 32; j
++)
359 #pragma acc loop worker vector reduction(+:res)
360 for (i
= 0; i
< 32; i
++)
361 res
+= arr
[j
* 32 + i
];
367 for (j
= 0; j
< 32; j
++)
371 for (i
= 0; i
< 32; i
++)
372 hres
+= arr
[j
* 32 + i
];
374 assert (out
[j
] == hres
);
379 /* Test of reduction on loop directive (workers and vectors, private reduction
384 int i
, j
, arr
[32768], out
[32], res
= 0, hres
= 0;
386 for (i
= 0; i
< 32768; i
++)
389 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
390 private(res) copyout(out)
392 #pragma acc loop gang
393 for (j
= 0; j
< 32; j
++)
397 #pragma acc loop worker reduction(+:res)
398 for (i
= 0; i
< 1024; i
++)
399 res
+= arr
[j
* 1024 + i
];
401 #pragma acc loop vector reduction(+:res)
402 for (i
= 1023; i
>= 0; i
--)
403 res
+= arr
[j
* 1024 + i
];
409 for (j
= 0; j
< 32; j
++)
413 for (i
= 0; i
< 1024; i
++)
414 hres
+= arr
[j
* 1024 + i
] * 2;
416 assert (out
[j
] == hres
);
421 /* Test of reduction on loop directive (workers and vectors, private reduction
422 variable: gang-redundant mode). */
426 int i
, arr
[1024], out
[32], res
= 0, hres
= 0;
428 for (i
= 0; i
< 1024; i
++)
431 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
432 private(res) copyin(arr) copyout(out)
434 /* Private variables aren't initialized by default in openacc. */
437 /* "res" should be available at the end of the following loop (and should
438 have the same value redundantly in each gang). */
439 #pragma acc loop worker vector reduction(+:res)
440 for (i
= 0; i
< 1024; i
++)
443 #pragma acc loop gang (static: 1)
444 for (i
= 0; i
< 32; i
++)
448 for (i
= 0; i
< 1024; i
++)
451 for (i
= 0; i
< 32; i
++)
452 assert (out
[i
] == hres
);