PR c/81417
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / reduction-7.c
blobcc3cd07301ae206d2a90407f23b05f06215556d0
1 /* Tests of reduction on loop directive. */
3 #include <assert.h>
6 /* Test of reduction on loop directive (gangs, non-private reduction
7 variable). */
9 void g_np_1()
11 int i, arr[1024], res = 0, hres = 0;
13 for (i = 0; i < 1024; i++)
14 arr[i] = i;
16 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
18 #pragma acc loop gang reduction(+:res)
19 for (i = 0; i < 1024; i++)
20 res += arr[i];
23 for (i = 0; i < 1024; i++)
24 hres += arr[i];
26 assert (res == hres);
28 res = hres = 1;
30 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
32 #pragma acc loop gang reduction(*:res)
33 for (i = 0; i < 12; i++)
34 res *= arr[i];
37 for (i = 0; i < 12; i++)
38 hres *= arr[i];
40 assert (res == hres);
44 /* Test of reduction on loop directive (gangs and vectors, non-private
45 reduction variable). */
47 void gv_np_1()
49 int i, arr[1024], res = 0, hres = 0;
51 for (i = 0; i < 1024; i++)
52 arr[i] = i;
54 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
56 #pragma acc loop gang vector reduction(+:res)
57 for (i = 0; i < 1024; i++)
58 res += arr[i];
61 for (i = 0; i < 1024; i++)
62 hres += arr[i];
64 assert (res == hres);
68 /* Test of reduction on loop directive (gangs and workers, non-private
69 reduction variable). */
71 void gw_np_1()
73 int i, arr[1024], res = 0, hres = 0;
75 for (i = 0; i < 1024; i++)
76 arr[i] = i;
78 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
80 #pragma acc loop gang worker reduction(+:res)
81 for (i = 0; i < 1024; i++)
82 res += arr[i];
85 for (i = 0; i < 1024; i++)
86 hres += arr[i];
88 assert (res == hres);
92 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
93 reduction variable). */
95 void gwv_np_1()
97 int i, arr[1024], res = 0, hres = 0;
99 for (i = 0; i < 1024; i++)
100 arr[i] = i;
102 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
104 #pragma acc loop gang worker vector reduction(+:res)
105 for (i = 0; i < 1024; i++)
106 res += arr[i];
109 for (i = 0; i < 1024; i++)
110 hres += arr[i];
112 assert (res == hres);
116 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
117 reduction variable: separate gang and worker/vector loops). */
119 void gwv_np_2()
121 int i, j, arr[32768], res = 0, hres = 0;
123 for (i = 0; i < 32768; i++)
124 arr[i] = i;
126 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
128 #pragma acc loop gang reduction(+:res)
129 for (j = 0; j < 32; j++)
131 #pragma acc loop worker vector reduction(+:res)
132 for (i = 0; i < 1024; i++)
133 res += arr[j * 1024 + i];
135 /* "res" is non-private, and is not available until after the parallel
136 region. */
139 for (i = 0; i < 32768; i++)
140 hres += arr[i];
142 assert (res == hres);
146 /* Test of reduction on loop directive (gangs, workers and vectors, non-private
147 reduction variable: separate gang and worker/vector loops). */
149 void gwv_np_3()
151 int i, j;
152 double arr[32768], res = 0, hres = 0;
154 for (i = 0; i < 32768; i++)
155 arr[i] = i;
157 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
158 copyin(arr)
160 #pragma acc loop gang reduction(+:res)
161 for (j = 0; j < 32; j++)
163 #pragma acc loop worker vector reduction(+:res)
164 for (i = 0; i < 1024; i++)
165 res += arr[j * 1024 + i];
169 for (i = 0; i < 32768; i++)
170 hres += arr[i];
172 assert (res == hres);
176 /* Test of reduction on loop directive (gangs, workers and vectors, multiple
177 non-private reduction variables, float type). */
179 void gwv_np_4()
181 int i, j;
182 float arr[32768];
183 float res = 0, mres = 0, hres = 0, hmres = 0;
185 for (i = 0; i < 32768; i++)
186 arr[i] = i;
188 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
190 #pragma acc loop gang reduction(+:res) reduction(max:mres)
191 for (j = 0; j < 32; j++)
193 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
194 for (i = 0; i < 1024; i++)
196 res += arr[j * 1024 + i];
197 if (arr[j * 1024 + i] > mres)
198 mres = arr[j * 1024 + i];
201 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
202 for (i = 0; i < 1024; i++)
204 res += arr[j * 1024 + (1023 - i)];
205 if (arr[j * 1024 + (1023 - i)] > mres)
206 mres = arr[j * 1024 + (1023 - i)];
211 for (j = 0; j < 32; j++)
212 for (i = 0; i < 1024; i++)
214 hres += arr[j * 1024 + i];
215 hres += arr[j * 1024 + (1023 - i)];
216 if (arr[j * 1024 + i] > hmres)
217 hmres = arr[j * 1024 + i];
218 if (arr[j * 1024 + (1023 - i)] > hmres)
219 hmres = arr[j * 1024 + (1023 - i)];
222 assert (res == hres);
223 assert (mres == hmres);
227 /* Test of reduction on loop directive (vectors, private reduction
228 variable). */
230 void v_p_1()
232 int i, j, arr[1024], out[32], res = 0, hres = 0;
234 for (i = 0; i < 1024; i++)
235 arr[i] = i;
237 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
238 private(res) copyout(out)
240 #pragma acc loop gang
241 for (j = 0; j < 32; j++)
243 res = 0;
245 #pragma acc loop vector reduction(+:res)
246 for (i = 0; i < 32; i++)
247 res += arr[j * 32 + i];
249 out[j] = res;
253 for (j = 0; j < 32; j++)
255 hres = 0;
257 for (i = 0; i < 32; i++)
258 hres += arr[j * 32 + i];
260 assert (out[j] == hres);
265 /* Test of reduction on loop directive (vector reduction in
266 gang-partitioned/worker-partitioned mode, private reduction variable). */
268 void v_p_2()
270 int i, j, k;
271 double ina[1024], inb[1024], out[1024], acc;
273 for (j = 0; j < 32; j++)
274 for (i = 0; i < 32; i++)
276 ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
277 inb[j * 32 + i] = (double) (i + j);
280 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
281 private(acc) copyin(ina, inb) copyout(out)
283 #pragma acc loop gang worker
284 for (k = 0; k < 32; k++)
285 for (j = 0; j < 32; j++)
287 acc = 0;
289 #pragma acc loop vector reduction(+:acc)
290 for (i = 0; i < 32; i++)
291 acc += ina[k * 32 + i] * inb[i * 32 + j];
293 out[k * 32 + j] = acc;
297 for (j = 0; j < 32; j++)
298 for (i = 0; i < 32; i++)
299 assert (out[j * 32 + i] == (i + j) * 2);
303 /* Test of reduction on loop directive (workers, private reduction
304 variable). */
306 void w_p_1()
308 int i, j, arr[1024], out[32], res = 0, hres = 0;
310 for (i = 0; i < 1024; i++)
311 arr[i] = i;
313 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
314 private(res) copyout(out)
316 #pragma acc loop gang
317 for (j = 0; j < 32; j++)
319 res = 0;
321 #pragma acc loop worker reduction(+:res)
322 for (i = 0; i < 32; i++)
323 res += arr[j * 32 + i];
325 out[j] = res;
329 for (j = 0; j < 32; j++)
331 hres = 0;
333 for (i = 0; i < 32; i++)
334 hres += arr[j * 32 + i];
336 assert (out[j] == hres);
341 /* Test of reduction on loop directive (workers and vectors, private reduction
342 variable). */
344 void wv_p_1()
346 int i, j, arr[1024], out[32], res = 0, hres = 0;
348 for (i = 0; i < 1024; i++)
349 arr[i] = i;
351 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
352 private(res) copyout(out)
354 #pragma acc loop gang
355 for (j = 0; j < 32; j++)
357 res = 0;
359 #pragma acc loop worker vector reduction(+:res)
360 for (i = 0; i < 32; i++)
361 res += arr[j * 32 + i];
363 out[j] = res;
367 for (j = 0; j < 32; j++)
369 hres = 0;
371 for (i = 0; i < 32; i++)
372 hres += arr[j * 32 + i];
374 assert (out[j] == hres);
379 /* Test of reduction on loop directive (workers and vectors, private reduction
380 variable). */
382 void wv_p_2()
384 int i, j, arr[32768], out[32], res = 0, hres = 0;
386 for (i = 0; i < 32768; i++)
387 arr[i] = i;
389 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
390 private(res) copyout(out)
392 #pragma acc loop gang
393 for (j = 0; j < 32; j++)
395 res = j;
397 #pragma acc loop worker reduction(+:res)
398 for (i = 0; i < 1024; i++)
399 res += arr[j * 1024 + i];
401 #pragma acc loop vector reduction(+:res)
402 for (i = 1023; i >= 0; i--)
403 res += arr[j * 1024 + i];
405 out[j] = res;
409 for (j = 0; j < 32; j++)
411 hres = j;
413 for (i = 0; i < 1024; i++)
414 hres += arr[j * 1024 + i] * 2;
416 assert (out[j] == hres);
421 /* Test of reduction on loop directive (workers and vectors, private reduction
422 variable: gang-redundant mode). */
424 void wv_p_3()
426 int i, arr[1024], out[32], res = 0, hres = 0;
428 for (i = 0; i < 1024; i++)
429 arr[i] = i ^ 33;
431 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
432 private(res) copyin(arr) copyout(out)
434 /* Private variables aren't initialized by default in openacc. */
435 res = 0;
437 /* "res" should be available at the end of the following loop (and should
438 have the same value redundantly in each gang). */
439 #pragma acc loop worker vector reduction(+:res)
440 for (i = 0; i < 1024; i++)
441 res += arr[i];
443 #pragma acc loop gang (static: 1)
444 for (i = 0; i < 32; i++)
445 out[i] = res;
448 for (i = 0; i < 1024; i++)
449 hres += arr[i];
451 for (i = 0; i < 32; i++)
452 assert (out[i] == hres);
456 int main()
458 g_np_1();
459 gv_np_1();
460 gw_np_1();
461 gwv_np_1();
462 gwv_np_2();
463 gwv_np_3();
464 gwv_np_4();
465 v_p_1();
466 v_p_2();
467 w_p_1();
468 wv_p_1();
469 wv_p_2();
470 wv_p_3();
472 return 0;