Update 'Q' constraint documentation.
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / asyncwait-1.c
blobe780845a79300087df689df0e84a5085baa3817f
1 /* { dg-do run } */
2 /* { dg-additional-options "-lcuda" { target openacc_nvidia_accel_selected } } */
4 #include <openacc.h>
5 #include <stdlib.h>
6 #if defined ACC_DEVICE_TYPE_nvidia
7 #include "cuda.h"
8 #endif
10 #include <stdio.h>
11 #include <sys/time.h>
13 int
14 main (int argc, char **argv)
16 #if defined ACC_DEVICE_TYPE_nvidia
17 CUresult r;
18 CUstream stream1;
19 #endif
20 int N = 128; //1024 * 1024;
21 float *a, *b, *c, *d, *e;
22 int i;
23 int nbytes;
25 #if defined ACC_DEVICE_TYPE_nvidia
26 acc_init (acc_device_nvidia);
27 #endif
29 nbytes = N * sizeof (float);
31 a = (float *) malloc (nbytes);
32 b = (float *) malloc (nbytes);
33 c = (float *) malloc (nbytes);
34 d = (float *) malloc (nbytes);
35 e = (float *) malloc (nbytes);
37 for (i = 0; i < N; i++)
39 a[i] = 3.0;
40 b[i] = 0.0;
43 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
46 #pragma acc parallel async
48 int ii;
50 for (ii = 0; ii < N; ii++)
51 b[ii] = a[ii];
54 #pragma acc wait
58 for (i = 0; i < N; i++)
60 if (a[i] != 3.0)
61 abort ();
63 if (b[i] != 3.0)
64 abort ();
67 for (i = 0; i < N; i++)
69 a[i] = 2.0;
70 b[i] = 0.0;
73 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
76 #pragma acc parallel async (1)
78 int ii;
80 for (ii = 0; ii < N; ii++)
81 b[ii] = a[ii];
84 #pragma acc wait (1)
88 for (i = 0; i < N; i++)
90 if (a[i] != 2.0)
91 abort ();
93 if (b[i] != 2.0)
94 abort ();
97 for (i = 0; i < N; i++)
99 a[i] = 3.0;
100 b[i] = 0.0;
101 c[i] = 0.0;
102 d[i] = 0.0;
105 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
108 #pragma acc parallel async (1)
110 int ii;
112 for (ii = 0; ii < N; ii++)
113 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
116 #pragma acc parallel async (1)
118 int ii;
120 for (ii = 0; ii < N; ii++)
121 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
125 #pragma acc parallel async (1)
127 int ii;
129 for (ii = 0; ii < N; ii++)
130 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
133 #pragma acc wait (1)
137 for (i = 0; i < N; i++)
139 if (a[i] != 3.0)
140 abort ();
142 if (b[i] != 9.0)
143 abort ();
145 if (c[i] != 4.0)
146 abort ();
148 if (d[i] != 1.0)
149 abort ();
152 for (i = 0; i < N; i++)
154 a[i] = 2.0;
155 b[i] = 0.0;
156 c[i] = 0.0;
157 d[i] = 0.0;
158 e[i] = 0.0;
161 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
164 #pragma acc parallel async (1)
166 int ii;
168 for (ii = 0; ii < N; ii++)
169 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
172 #pragma acc parallel async (1)
174 int ii;
176 for (ii = 0; ii < N; ii++)
177 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
180 #pragma acc parallel async (1)
182 int ii;
184 for (ii = 0; ii < N; ii++)
185 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
188 #pragma acc parallel wait (1) async (1)
190 int ii;
192 for (ii = 0; ii < N; ii++)
193 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
196 #pragma acc wait (1)
200 for (i = 0; i < N; i++)
202 if (a[i] != 2.0)
203 abort ();
205 if (b[i] != 4.0)
206 abort ();
208 if (c[i] != 4.0)
209 abort ();
211 if (d[i] != 1.0)
212 abort ();
214 if (e[i] != 11.0)
215 abort ();
219 #if defined ACC_DEVICE_TYPE_nvidia
220 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
221 if (r != CUDA_SUCCESS)
223 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
224 abort ();
227 acc_set_cuda_stream (1, stream1);
228 #endif
230 for (i = 0; i < N; i++)
232 a[i] = 5.0;
233 b[i] = 0.0;
236 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
239 #pragma acc parallel async (1)
241 int ii;
243 for (ii = 0; ii < N; ii++)
244 b[ii] = a[ii];
247 #pragma acc wait (1)
251 for (i = 0; i < N; i++)
253 if (a[i] != 5.0)
254 abort ();
256 if (b[i] != 5.0)
257 abort ();
260 for (i = 0; i < N; i++)
262 a[i] = 7.0;
263 b[i] = 0.0;
264 c[i] = 0.0;
265 d[i] = 0.0;
268 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
271 #pragma acc parallel async (1)
273 int ii;
275 for (ii = 0; ii < N; ii++)
276 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
279 #pragma acc parallel async (1)
281 int ii;
283 for (ii = 0; ii < N; ii++)
284 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
287 #pragma acc parallel async (1)
289 int ii;
291 for (ii = 0; ii < N; ii++)
292 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
295 #pragma acc wait (1)
299 for (i = 0; i < N; i++)
301 if (a[i] != 7.0)
302 abort ();
304 if (b[i] != 49.0)
305 abort ();
307 if (c[i] != 4.0)
308 abort ();
310 if (d[i] != 1.0)
311 abort ();
314 for (i = 0; i < N; i++)
316 a[i] = 3.0;
317 b[i] = 0.0;
318 c[i] = 0.0;
319 d[i] = 0.0;
320 e[i] = 0.0;
323 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
326 #pragma acc parallel async (1)
328 int ii;
330 for (ii = 0; ii < N; ii++)
331 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
334 #pragma acc parallel async (1)
336 int ii;
338 for (ii = 0; ii < N; ii++)
339 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
342 #pragma acc parallel async (1)
344 int ii;
346 for (ii = 0; ii < N; ii++)
347 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
350 #pragma acc parallel wait (1) async (1)
352 int ii;
354 for (ii = 0; ii < N; ii++)
355 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
358 #pragma acc wait (1)
362 for (i = 0; i < N; i++)
364 if (a[i] != 3.0)
365 abort ();
367 if (b[i] != 9.0)
368 abort ();
370 if (c[i] != 4.0)
371 abort ();
373 if (d[i] != 1.0)
374 abort ();
376 if (e[i] != 17.0)
377 abort ();
380 for (i = 0; i < N; i++)
382 a[i] = 4.0;
383 b[i] = 0.0;
384 c[i] = 0.0;
385 d[i] = 0.0;
386 e[i] = 0.0;
389 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
392 #pragma acc parallel async (1)
394 int ii;
396 for (ii = 0; ii < N; ii++)
397 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
400 #pragma acc parallel async (1)
402 int ii;
404 for (ii = 0; ii < N; ii++)
405 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
408 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
412 for (i = 0; i < N; i++)
414 if (a[i] != 4.0)
415 abort ();
417 if (b[i] != 16.0)
418 abort ();
420 if (c[i] != 4.0)
421 abort ();
425 for (i = 0; i < N; i++)
427 a[i] = 5.0;
428 b[i] = 0.0;
429 c[i] = 0.0;
430 d[i] = 0.0;
431 e[i] = 0.0;
434 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
437 #pragma acc parallel async (1)
439 int ii;
441 for (ii = 0; ii < N; ii++)
442 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
445 #pragma acc parallel async (1)
447 int ii;
449 for (ii = 0; ii < N; ii++)
450 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
453 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
455 #pragma acc wait (1)
459 for (i = 0; i < N; i++)
461 if (a[i] != 5.0)
462 abort ();
464 if (b[i] != 25.0)
465 abort ();
467 if (c[i] != 4.0)
468 abort ();
471 for (i = 0; i < N; i++)
473 a[i] = 3.0;
474 b[i] = 0.0;
477 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
480 #pragma acc kernels async
482 int ii;
484 for (ii = 0; ii < N; ii++)
485 b[ii] = a[ii];
488 #pragma acc wait
492 for (i = 0; i < N; i++)
494 if (a[i] != 3.0)
495 abort ();
497 if (b[i] != 3.0)
498 abort ();
501 for (i = 0; i < N; i++)
503 a[i] = 2.0;
504 b[i] = 0.0;
507 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
510 #pragma acc kernels async (1)
512 int ii;
514 for (ii = 0; ii < N; ii++)
515 b[ii] = a[ii];
518 #pragma acc wait (1)
522 for (i = 0; i < N; i++)
524 if (a[i] != 2.0)
525 abort ();
527 if (b[i] != 2.0)
528 abort ();
531 for (i = 0; i < N; i++)
533 a[i] = 3.0;
534 b[i] = 0.0;
535 c[i] = 0.0;
536 d[i] = 0.0;
539 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
542 #pragma acc kernels async (1)
544 int ii;
546 for (ii = 0; ii < N; ii++)
547 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
550 #pragma acc kernels async (1)
552 int ii;
554 for (ii = 0; ii < N; ii++)
555 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
559 #pragma acc kernels async (1)
561 int ii;
563 for (ii = 0; ii < N; ii++)
564 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
567 #pragma acc wait (1)
571 for (i = 0; i < N; i++)
573 if (a[i] != 3.0)
574 abort ();
576 if (b[i] != 9.0)
577 abort ();
579 if (c[i] != 4.0)
580 abort ();
582 if (d[i] != 1.0)
583 abort ();
586 for (i = 0; i < N; i++)
588 a[i] = 2.0;
589 b[i] = 0.0;
590 c[i] = 0.0;
591 d[i] = 0.0;
592 e[i] = 0.0;
595 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
598 #pragma acc kernels async (1)
600 int ii;
602 for (ii = 0; ii < N; ii++)
603 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
606 #pragma acc kernels async (1)
608 int ii;
610 for (ii = 0; ii < N; ii++)
611 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
614 #pragma acc kernels async (1)
616 int ii;
618 for (ii = 0; ii < N; ii++)
619 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
622 #pragma acc kernels wait (1) async (1)
624 int ii;
626 for (ii = 0; ii < N; ii++)
627 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
630 #pragma acc wait (1)
634 for (i = 0; i < N; i++)
636 if (a[i] != 2.0)
637 abort ();
639 if (b[i] != 4.0)
640 abort ();
642 if (c[i] != 4.0)
643 abort ();
645 if (d[i] != 1.0)
646 abort ();
648 if (e[i] != 11.0)
649 abort ();
653 #if defined ACC_DEVICE_TYPE_nvidia
654 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
655 if (r != CUDA_SUCCESS)
657 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
658 abort ();
661 acc_set_cuda_stream (1, stream1);
662 #endif
664 for (i = 0; i < N; i++)
666 a[i] = 5.0;
667 b[i] = 0.0;
670 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
673 #pragma acc kernels async (1)
675 int ii;
677 for (ii = 0; ii < N; ii++)
678 b[ii] = a[ii];
681 #pragma acc wait (1)
685 for (i = 0; i < N; i++)
687 if (a[i] != 5.0)
688 abort ();
690 if (b[i] != 5.0)
691 abort ();
694 for (i = 0; i < N; i++)
696 a[i] = 7.0;
697 b[i] = 0.0;
698 c[i] = 0.0;
699 d[i] = 0.0;
702 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
705 #pragma acc kernels async (1)
707 int ii;
709 for (ii = 0; ii < N; ii++)
710 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
713 #pragma acc kernels async (1)
715 int ii;
717 for (ii = 0; ii < N; ii++)
718 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
721 #pragma acc kernels async (1)
723 int ii;
725 for (ii = 0; ii < N; ii++)
726 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
729 #pragma acc wait (1)
733 for (i = 0; i < N; i++)
735 if (a[i] != 7.0)
736 abort ();
738 if (b[i] != 49.0)
739 abort ();
741 if (c[i] != 4.0)
742 abort ();
744 if (d[i] != 1.0)
745 abort ();
748 for (i = 0; i < N; i++)
750 a[i] = 3.0;
751 b[i] = 0.0;
752 c[i] = 0.0;
753 d[i] = 0.0;
754 e[i] = 0.0;
757 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
760 #pragma acc kernels async (1)
762 int ii;
764 for (ii = 0; ii < N; ii++)
765 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
768 #pragma acc kernels async (1)
770 int ii;
772 for (ii = 0; ii < N; ii++)
773 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
776 #pragma acc kernels async (1)
778 int ii;
780 for (ii = 0; ii < N; ii++)
781 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
784 #pragma acc kernels wait (1) async (1)
786 int ii;
788 for (ii = 0; ii < N; ii++)
789 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
792 #pragma acc wait (1)
796 for (i = 0; i < N; i++)
798 if (a[i] != 3.0)
799 abort ();
801 if (b[i] != 9.0)
802 abort ();
804 if (c[i] != 4.0)
805 abort ();
807 if (d[i] != 1.0)
808 abort ();
810 if (e[i] != 17.0)
811 abort ();
814 for (i = 0; i < N; i++)
816 a[i] = 4.0;
817 b[i] = 0.0;
818 c[i] = 0.0;
819 d[i] = 0.0;
820 e[i] = 0.0;
823 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
826 #pragma acc kernels async (1)
828 int ii;
830 for (ii = 0; ii < N; ii++)
831 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
834 #pragma acc kernels async (1)
836 int ii;
838 for (ii = 0; ii < N; ii++)
839 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
842 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
846 for (i = 0; i < N; i++)
848 if (a[i] != 4.0)
849 abort ();
851 if (b[i] != 16.0)
852 abort ();
854 if (c[i] != 4.0)
855 abort ();
859 for (i = 0; i < N; i++)
861 a[i] = 5.0;
862 b[i] = 0.0;
863 c[i] = 0.0;
864 d[i] = 0.0;
865 e[i] = 0.0;
868 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
871 #pragma acc kernels async (1)
873 int ii;
875 for (ii = 0; ii < N; ii++)
876 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
879 #pragma acc kernels async (1)
881 int ii;
883 for (ii = 0; ii < N; ii++)
884 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
887 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
889 #pragma acc wait (1)
893 for (i = 0; i < N; i++)
895 if (a[i] != 5.0)
896 abort ();
898 if (b[i] != 25.0)
899 abort ();
901 if (c[i] != 4.0)
902 abort ();
905 #if defined ACC_DEVICE_TYPE_nvidia
906 acc_shutdown (acc_device_nvidia);
907 #endif
909 return 0;