PR c++/86342 - -Wdeprecated-copy and system headers.
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / parallel-dims.c
blob4a9854662cc5edccb05b15837c644bcc792a4989
1 /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
2 vector_length. */
4 #include <limits.h>
5 #include <openacc.h>
6 #include <gomp-constants.h>
8 /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
9 not behaving as expected for -O0. */
10 #pragma acc routine seq
11 static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
13 if (acc_on_device ((int) acc_device_host))
14 return 0;
15 else if (acc_on_device ((int) acc_device_nvidia))
16 return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
17 else
18 __builtin_abort ();
21 #pragma acc routine seq
22 static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
24 if (acc_on_device ((int) acc_device_host))
25 return 0;
26 else if (acc_on_device ((int) acc_device_nvidia))
27 return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
28 else
29 __builtin_abort ();
32 #pragma acc routine seq
33 static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
35 if (acc_on_device ((int) acc_device_host))
36 return 0;
37 else if (acc_on_device ((int) acc_device_nvidia))
38 return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
39 else
40 __builtin_abort ();
44 int main ()
46 acc_init (acc_device_default);
48 /* Non-positive value. */
50 /* GR, WS, VS. */
52 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
53 int gangs_actual = GANGS;
54 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
55 gangs_min = workers_min = vectors_min = INT_MAX;
56 gangs_max = workers_max = vectors_max = INT_MIN;
57 #pragma acc parallel copy (gangs_actual) \
58 reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
59 num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
61 /* We're actually executing with num_gangs (1). */
62 gangs_actual = 1;
63 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
65 /* <https://gcc.gnu.org/PR80547>. */
66 #if 0
67 gangs_min = gangs_max = acc_gang ();
68 workers_min = workers_max = acc_worker ();
69 vectors_min = vectors_max = acc_vector ();
70 #else
71 int gangs = acc_gang ();
72 gangs_min = (gangs_min < gangs) ? gangs_min : gangs;
73 gangs_max = (gangs_max > gangs) ? gangs_max : gangs;
74 int workers = acc_worker ();
75 workers_min = (workers_min < workers) ? workers_min : workers;
76 workers_max = (workers_max > workers) ? workers_max : workers;
77 int vectors = acc_vector ();
78 vectors_min = (vectors_min < vectors) ? vectors_min : vectors;
79 vectors_max = (vectors_max > vectors) ? vectors_max : vectors;
80 #endif
83 if (gangs_actual != 1)
84 __builtin_abort ();
85 if (gangs_min != 0 || gangs_max != gangs_actual - 1
86 || workers_min != 0 || workers_max != 0
87 || vectors_min != 0 || vectors_max != 0)
88 __builtin_abort ();
89 #undef GANGS
92 /* GP, WS, VS. */
94 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
95 int gangs_actual = GANGS;
96 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
97 gangs_min = workers_min = vectors_min = INT_MAX;
98 gangs_max = workers_max = vectors_max = INT_MIN;
99 #pragma acc parallel copy (gangs_actual) \
100 num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
102 /* We're actually executing with num_gangs (1). */
103 gangs_actual = 1;
104 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
105 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
107 gangs_min = gangs_max = acc_gang ();
108 workers_min = workers_max = acc_worker ();
109 vectors_min = vectors_max = acc_vector ();
112 if (gangs_actual != 1)
113 __builtin_abort ();
114 if (gangs_min != 0 || gangs_max != gangs_actual - 1
115 || workers_min != 0 || workers_max != 0
116 || vectors_min != 0 || vectors_max != 0)
117 __builtin_abort ();
118 #undef GANGS
121 /* GR, WP, VS. */
123 #define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */
124 int workers_actual = WORKERS;
125 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
126 gangs_min = workers_min = vectors_min = INT_MAX;
127 gangs_max = workers_max = vectors_max = INT_MIN;
128 #pragma acc parallel copy (workers_actual) \
129 num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */
131 /* We're actually executing with num_workers (1). */
132 workers_actual = 1;
133 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
134 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
136 gangs_min = gangs_max = acc_gang ();
137 workers_min = workers_max = acc_worker ();
138 vectors_min = vectors_max = acc_vector ();
141 if (workers_actual != 1)
142 __builtin_abort ();
143 if (gangs_min != 0 || gangs_max != 0
144 || workers_min != 0 || workers_max != workers_actual - 1
145 || vectors_min != 0 || vectors_max != 0)
146 __builtin_abort ();
147 #undef WORKERS
150 /* GR, WS, VP. */
152 #define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */
153 int vectors_actual = VECTORS;
154 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
155 gangs_min = workers_min = vectors_min = INT_MAX;
156 gangs_max = workers_max = vectors_max = INT_MIN;
157 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_configured } } */ \
158 vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
160 /* We're actually executing with vector_length (1), just the GCC nvptx
161 back end enforces vector_length (32). */
162 if (acc_on_device (acc_device_nvidia))
163 vectors_actual = 32;
164 else
165 vectors_actual = 1;
166 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
167 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
169 gangs_min = gangs_max = acc_gang ();
170 workers_min = workers_max = acc_worker ();
171 vectors_min = vectors_max = acc_vector ();
174 if (acc_get_device_type () == acc_device_nvidia)
176 if (vectors_actual != 32)
177 __builtin_abort ();
179 else
180 if (vectors_actual != 1)
181 __builtin_abort ();
182 if (gangs_min != 0 || gangs_max != 0
183 || workers_min != 0 || workers_max != 0
184 || vectors_min != 0 || vectors_max != vectors_actual - 1)
185 __builtin_abort ();
186 #undef VECTORS
190 /* High value. */
192 /* GR, WS, VS. */
194 /* There is no actual limit for the number of gangs, so we try with a
195 rather high value. */
196 int gangs = 12345;
197 int gangs_actual = gangs;
198 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
199 gangs_min = workers_min = vectors_min = INT_MAX;
200 gangs_max = workers_max = vectors_max = INT_MIN;
201 #pragma acc parallel copy (gangs_actual) \
202 reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
203 num_gangs (gangs)
205 if (acc_on_device (acc_device_host))
207 /* We're actually executing with num_gangs (1). */
208 gangs_actual = 1;
210 /* As we're executing GR not GP, don't multiply with a "gangs_actual"
211 factor. */
212 for (int i = 100 /* * gangs_actual */; i > -100 /* * gangs_actual */; --i)
214 gangs_min = gangs_max = acc_gang ();
215 workers_min = workers_max = acc_worker ();
216 vectors_min = vectors_max = acc_vector ();
219 if (gangs_actual < 1)
220 __builtin_abort ();
221 if (gangs_min != 0 || gangs_max != gangs_actual - 1
222 || workers_min != 0 || workers_max != 0
223 || vectors_min != 0 || vectors_max != 0)
224 __builtin_abort ();
227 /* GP, WS, VS. */
229 /* There is no actual limit for the number of gangs, so we try with a
230 rather high value. */
231 int gangs = 12345;
232 int gangs_actual = gangs;
233 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
234 gangs_min = workers_min = vectors_min = INT_MAX;
235 gangs_max = workers_max = vectors_max = INT_MIN;
236 #pragma acc parallel copy (gangs_actual) \
237 num_gangs (gangs)
239 if (acc_on_device (acc_device_host))
241 /* We're actually executing with num_gangs (1). */
242 gangs_actual = 1;
244 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
245 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
247 gangs_min = gangs_max = acc_gang ();
248 workers_min = workers_max = acc_worker ();
249 vectors_min = vectors_max = acc_vector ();
252 if (gangs_actual < 1)
253 __builtin_abort ();
254 if (gangs_min != 0 || gangs_max != gangs_actual - 1
255 || workers_min != 0 || workers_max != 0
256 || vectors_min != 0 || vectors_max != 0)
257 __builtin_abort ();
260 /* GR, WP, VS. */
262 /* We try with an outrageously large value. */
263 #define WORKERS 2 << 20
264 int workers_actual = WORKERS;
265 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
266 gangs_min = workers_min = vectors_min = INT_MAX;
267 gangs_max = workers_max = vectors_max = INT_MIN;
268 #pragma acc parallel copy (workers_actual) /* { dg-warning "using num_workers \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
269 num_workers (WORKERS)
271 if (acc_on_device (acc_device_host))
273 /* We're actually executing with num_workers (1). */
274 workers_actual = 1;
276 else if (acc_on_device (acc_device_nvidia))
278 /* The GCC nvptx back end enforces num_workers (32). */
279 workers_actual = 32;
281 else
282 __builtin_abort ();
283 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
284 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
286 gangs_min = gangs_max = acc_gang ();
287 workers_min = workers_max = acc_worker ();
288 vectors_min = vectors_max = acc_vector ();
291 if (workers_actual < 1)
292 __builtin_abort ();
293 if (gangs_min != 0 || gangs_max != 0
294 || workers_min != 0 || workers_max != workers_actual - 1
295 || vectors_min != 0 || vectors_max != 0)
296 __builtin_abort ();
297 #undef WORKERS
300 /* GR, WP, VS. */
302 /* We try with an outrageously large value. */
303 int workers = 2 << 20;
304 /* For nvptx offloading, this one will not result in "using num_workers
305 (32), ignoring runtime setting", and will in fact try to launch with
306 "num_workers (workers)", which will run into "libgomp: cuLaunchKernel
307 error: invalid argument". So, limit ourselves here. */
308 if (acc_get_device_type () == acc_device_nvidia)
309 workers = 32;
310 int workers_actual = workers;
311 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
312 gangs_min = workers_min = vectors_min = INT_MAX;
313 gangs_max = workers_max = vectors_max = INT_MIN;
314 #pragma acc parallel copy (workers_actual) \
315 num_workers (workers)
317 if (acc_on_device (acc_device_host))
319 /* We're actually executing with num_workers (1). */
320 workers_actual = 1;
322 else if (acc_on_device (acc_device_nvidia))
324 /* We're actually executing with num_workers (32). */
325 /* workers_actual = 32; */
327 else
328 __builtin_abort ();
329 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
330 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
332 gangs_min = gangs_max = acc_gang ();
333 workers_min = workers_max = acc_worker ();
334 vectors_min = vectors_max = acc_vector ();
337 if (workers_actual < 1)
338 __builtin_abort ();
339 if (gangs_min != 0 || gangs_max != 0
340 || workers_min != 0 || workers_max != workers_actual - 1
341 || vectors_min != 0 || vectors_max != 0)
342 __builtin_abort ();
345 /* GR, WS, VP. */
347 /* We try with an outrageously large value. */
348 #define VECTORS 2 << 20
349 int vectors_actual = VECTORS;
350 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
351 gangs_min = workers_min = vectors_min = INT_MAX;
352 gangs_max = workers_max = vectors_max = INT_MIN;
353 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
354 vector_length (VECTORS)
356 if (acc_on_device (acc_device_host))
358 /* We're actually executing with vector_length (1). */
359 vectors_actual = 1;
361 else if (acc_on_device (acc_device_nvidia))
363 /* The GCC nvptx back end enforces vector_length (32). */
364 vectors_actual = 32;
366 else
367 __builtin_abort ();
368 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
369 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
371 gangs_min = gangs_max = acc_gang ();
372 workers_min = workers_max = acc_worker ();
373 vectors_min = vectors_max = acc_vector ();
376 if (vectors_actual < 1)
377 __builtin_abort ();
378 if (gangs_min != 0 || gangs_max != 0
379 || workers_min != 0 || workers_max != 0
380 || vectors_min != 0 || vectors_max != vectors_actual - 1)
381 __builtin_abort ();
382 #undef VECTORS
385 /* GR, WS, VP. */
387 /* We try with an outrageously large value. */
388 int vectors = 2 << 20;
389 int vectors_actual = vectors;
390 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
391 gangs_min = workers_min = vectors_min = INT_MAX;
392 gangs_max = workers_max = vectors_max = INT_MIN;
393 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring runtime setting" "" { target openacc_nvidia_accel_configured } } */ \
394 vector_length (vectors)
396 if (acc_on_device (acc_device_host))
398 /* We're actually executing with vector_length (1). */
399 vectors_actual = 1;
401 else if (acc_on_device (acc_device_nvidia))
403 /* The GCC nvptx back end enforces vector_length (32). */
404 vectors_actual = 32;
406 else
407 __builtin_abort ();
408 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
409 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
411 gangs_min = gangs_max = acc_gang ();
412 workers_min = workers_max = acc_worker ();
413 vectors_min = vectors_max = acc_vector ();
416 if (vectors_actual < 1)
417 __builtin_abort ();
418 if (gangs_min != 0 || gangs_max != 0
419 || workers_min != 0 || workers_max != 0
420 || vectors_min != 0 || vectors_max != vectors_actual - 1)
421 __builtin_abort ();
425 /* Composition of GP, WP, VP. */
427 int gangs = 12345;
428 /* With nvptx offloading, multi-level reductions apparently are very slow
429 in the following case. So, limit ourselves here. */
430 if (acc_get_device_type () == acc_device_nvidia)
431 gangs = 3;
432 int gangs_actual = gangs;
433 #define WORKERS 3
434 int workers_actual = WORKERS;
435 #define VECTORS 11
436 int vectors_actual = VECTORS;
437 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
438 gangs_min = workers_min = vectors_min = INT_MAX;
439 gangs_max = workers_max = vectors_max = INT_MIN;
440 #pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 11" "" { target openacc_nvidia_accel_configured } } */ \
441 num_gangs (gangs) \
442 num_workers (WORKERS) \
443 vector_length (VECTORS)
445 if (acc_on_device (acc_device_host))
447 /* We're actually executing with num_gangs (1), num_workers (1),
448 vector_length (1). */
449 gangs_actual = 1;
450 workers_actual = 1;
451 vectors_actual = 1;
453 else if (acc_on_device (acc_device_nvidia))
455 /* The GCC nvptx back end enforces vector_length (32). */
456 vectors_actual = 32;
458 else
459 __builtin_abort ();
460 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
461 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
462 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
463 for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
464 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
465 for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
467 gangs_min = gangs_max = acc_gang ();
468 workers_min = workers_max = acc_worker ();
469 vectors_min = vectors_max = acc_vector ();
472 if (gangs_min != 0 || gangs_max != gangs_actual - 1
473 || workers_min != 0 || workers_max != workers_actual - 1
474 || vectors_min != 0 || vectors_max != vectors_actual - 1)
475 __builtin_abort ();
476 #undef VECTORS
477 #undef WORKERS
481 /* We can't test parallelized OpenACC kernels constructs in this way: use of
482 the acc_gang, acc_worker, acc_vector functions will make the construct
483 unparallelizable. */
486 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
487 kernels. */
489 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
490 gangs_min = workers_min = vectors_min = INT_MAX;
491 gangs_max = workers_max = vectors_max = INT_MIN;
492 #pragma acc kernels
494 /* This is to make the OpenACC kernels construct unparallelizable. */
495 asm volatile ("" : : : "memory");
497 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
498 for (int i = 100; i > -100; --i)
500 gangs_min = gangs_max = acc_gang ();
501 workers_min = workers_max = acc_worker ();
502 vectors_min = vectors_max = acc_vector ();
505 if (gangs_min != 0 || gangs_max != 1 - 1
506 || workers_min != 0 || workers_max != 1 - 1
507 || vectors_min != 0 || vectors_max != 1 - 1)
508 __builtin_abort ();
512 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
513 kernels even when there are explicit num_gangs, num_workers, or
514 vector_length clauses. */
516 int gangs = 5;
517 #define WORKERS 5
518 #define VECTORS 13
519 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
520 gangs_min = workers_min = vectors_min = INT_MAX;
521 gangs_max = workers_max = vectors_max = INT_MIN;
522 #pragma acc kernels \
523 num_gangs (gangs) \
524 num_workers (WORKERS) \
525 vector_length (VECTORS)
527 /* This is to make the OpenACC kernels construct unparallelizable. */
528 asm volatile ("" : : : "memory");
530 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
531 for (int i = 100; i > -100; --i)
533 gangs_min = gangs_max = acc_gang ();
534 workers_min = workers_max = acc_worker ();
535 vectors_min = vectors_max = acc_vector ();
538 if (gangs_min != 0 || gangs_max != 1 - 1
539 || workers_min != 0 || workers_max != 1 - 1
540 || vectors_min != 0 || vectors_max != 1 - 1)
541 __builtin_abort ();
542 #undef VECTORS
543 #undef WORKERS
547 return 0;