Add support for ARMv8-R architecture
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / asyncwait-1.c
blobd478ce2eef5799ad2cfec3c9274c463c916e92eb
1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
4 #include <openacc.h>
5 #include <stdlib.h>
6 #include "cuda.h"
8 #include <stdio.h>
9 #include <sys/time.h>
11 int
12 main (int argc, char **argv)
14 CUresult r;
15 CUstream stream1;
16 int N = 128; //1024 * 1024;
17 float *a, *b, *c, *d, *e;
18 int i;
19 int nbytes;
21 acc_init (acc_device_nvidia);
23 nbytes = N * sizeof (float);
25 a = (float *) malloc (nbytes);
26 b = (float *) malloc (nbytes);
27 c = (float *) malloc (nbytes);
28 d = (float *) malloc (nbytes);
29 e = (float *) malloc (nbytes);
31 for (i = 0; i < N; i++)
33 a[i] = 3.0;
34 b[i] = 0.0;
37 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
40 #pragma acc parallel async
42 int ii;
44 for (ii = 0; ii < N; ii++)
45 b[ii] = a[ii];
48 #pragma acc wait
52 for (i = 0; i < N; i++)
54 if (a[i] != 3.0)
55 abort ();
57 if (b[i] != 3.0)
58 abort ();
61 for (i = 0; i < N; i++)
63 a[i] = 2.0;
64 b[i] = 0.0;
67 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
70 #pragma acc parallel async (1)
72 int ii;
74 for (ii = 0; ii < N; ii++)
75 b[ii] = a[ii];
78 #pragma acc wait (1)
82 for (i = 0; i < N; i++)
84 if (a[i] != 2.0)
85 abort ();
87 if (b[i] != 2.0)
88 abort ();
91 for (i = 0; i < N; i++)
93 a[i] = 3.0;
94 b[i] = 0.0;
95 c[i] = 0.0;
96 d[i] = 0.0;
99 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
102 #pragma acc parallel async (1)
104 int ii;
106 for (ii = 0; ii < N; ii++)
107 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
110 #pragma acc parallel async (1)
112 int ii;
114 for (ii = 0; ii < N; ii++)
115 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
119 #pragma acc parallel async (1)
121 int ii;
123 for (ii = 0; ii < N; ii++)
124 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
127 #pragma acc wait (1)
131 for (i = 0; i < N; i++)
133 if (a[i] != 3.0)
134 abort ();
136 if (b[i] != 9.0)
137 abort ();
139 if (c[i] != 4.0)
140 abort ();
142 if (d[i] != 1.0)
143 abort ();
146 for (i = 0; i < N; i++)
148 a[i] = 2.0;
149 b[i] = 0.0;
150 c[i] = 0.0;
151 d[i] = 0.0;
152 e[i] = 0.0;
155 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
158 #pragma acc parallel async (1)
160 int ii;
162 for (ii = 0; ii < N; ii++)
163 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
166 #pragma acc parallel async (1)
168 int ii;
170 for (ii = 0; ii < N; ii++)
171 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
174 #pragma acc parallel async (1)
176 int ii;
178 for (ii = 0; ii < N; ii++)
179 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
182 #pragma acc parallel wait (1) async (1)
184 int ii;
186 for (ii = 0; ii < N; ii++)
187 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
190 #pragma acc wait (1)
194 for (i = 0; i < N; i++)
196 if (a[i] != 2.0)
197 abort ();
199 if (b[i] != 4.0)
200 abort ();
202 if (c[i] != 4.0)
203 abort ();
205 if (d[i] != 1.0)
206 abort ();
208 if (e[i] != 11.0)
209 abort ();
213 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
214 if (r != CUDA_SUCCESS)
216 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
217 abort ();
220 acc_set_cuda_stream (1, stream1);
222 for (i = 0; i < N; i++)
224 a[i] = 5.0;
225 b[i] = 0.0;
228 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
231 #pragma acc parallel async (1)
233 int ii;
235 for (ii = 0; ii < N; ii++)
236 b[ii] = a[ii];
239 #pragma acc wait (1)
243 for (i = 0; i < N; i++)
245 if (a[i] != 5.0)
246 abort ();
248 if (b[i] != 5.0)
249 abort ();
252 for (i = 0; i < N; i++)
254 a[i] = 7.0;
255 b[i] = 0.0;
256 c[i] = 0.0;
257 d[i] = 0.0;
260 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
263 #pragma acc parallel async (1)
265 int ii;
267 for (ii = 0; ii < N; ii++)
268 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
271 #pragma acc parallel async (1)
273 int ii;
275 for (ii = 0; ii < N; ii++)
276 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
279 #pragma acc parallel async (1)
281 int ii;
283 for (ii = 0; ii < N; ii++)
284 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
287 #pragma acc wait (1)
291 for (i = 0; i < N; i++)
293 if (a[i] != 7.0)
294 abort ();
296 if (b[i] != 49.0)
297 abort ();
299 if (c[i] != 4.0)
300 abort ();
302 if (d[i] != 1.0)
303 abort ();
306 for (i = 0; i < N; i++)
308 a[i] = 3.0;
309 b[i] = 0.0;
310 c[i] = 0.0;
311 d[i] = 0.0;
312 e[i] = 0.0;
315 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
318 #pragma acc parallel async (1)
320 int ii;
322 for (ii = 0; ii < N; ii++)
323 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
326 #pragma acc parallel async (1)
328 int ii;
330 for (ii = 0; ii < N; ii++)
331 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
334 #pragma acc parallel async (1)
336 int ii;
338 for (ii = 0; ii < N; ii++)
339 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
342 #pragma acc parallel wait (1) async (1)
344 int ii;
346 for (ii = 0; ii < N; ii++)
347 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
350 #pragma acc wait (1)
354 for (i = 0; i < N; i++)
356 if (a[i] != 3.0)
357 abort ();
359 if (b[i] != 9.0)
360 abort ();
362 if (c[i] != 4.0)
363 abort ();
365 if (d[i] != 1.0)
366 abort ();
368 if (e[i] != 17.0)
369 abort ();
372 for (i = 0; i < N; i++)
374 a[i] = 4.0;
375 b[i] = 0.0;
376 c[i] = 0.0;
377 d[i] = 0.0;
378 e[i] = 0.0;
381 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
384 #pragma acc parallel async (1)
386 int ii;
388 for (ii = 0; ii < N; ii++)
389 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
392 #pragma acc parallel async (1)
394 int ii;
396 for (ii = 0; ii < N; ii++)
397 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
400 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
404 for (i = 0; i < N; i++)
406 if (a[i] != 4.0)
407 abort ();
409 if (b[i] != 16.0)
410 abort ();
412 if (c[i] != 4.0)
413 abort ();
417 for (i = 0; i < N; i++)
419 a[i] = 5.0;
420 b[i] = 0.0;
421 c[i] = 0.0;
422 d[i] = 0.0;
423 e[i] = 0.0;
426 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
429 #pragma acc parallel async (1)
431 int ii;
433 for (ii = 0; ii < N; ii++)
434 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
437 #pragma acc parallel async (1)
439 int ii;
441 for (ii = 0; ii < N; ii++)
442 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
445 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
447 #pragma acc wait (1)
451 for (i = 0; i < N; i++)
453 if (a[i] != 5.0)
454 abort ();
456 if (b[i] != 25.0)
457 abort ();
459 if (c[i] != 4.0)
460 abort ();
463 for (i = 0; i < N; i++)
465 a[i] = 3.0;
466 b[i] = 0.0;
469 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
472 #pragma acc kernels async
474 int ii;
476 for (ii = 0; ii < N; ii++)
477 b[ii] = a[ii];
480 #pragma acc wait
484 for (i = 0; i < N; i++)
486 if (a[i] != 3.0)
487 abort ();
489 if (b[i] != 3.0)
490 abort ();
493 for (i = 0; i < N; i++)
495 a[i] = 2.0;
496 b[i] = 0.0;
499 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
502 #pragma acc kernels async (1)
504 int ii;
506 for (ii = 0; ii < N; ii++)
507 b[ii] = a[ii];
510 #pragma acc wait (1)
514 for (i = 0; i < N; i++)
516 if (a[i] != 2.0)
517 abort ();
519 if (b[i] != 2.0)
520 abort ();
523 for (i = 0; i < N; i++)
525 a[i] = 3.0;
526 b[i] = 0.0;
527 c[i] = 0.0;
528 d[i] = 0.0;
531 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
534 #pragma acc kernels async (1)
536 int ii;
538 for (ii = 0; ii < N; ii++)
539 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
542 #pragma acc kernels async (1)
544 int ii;
546 for (ii = 0; ii < N; ii++)
547 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
551 #pragma acc kernels async (1)
553 int ii;
555 for (ii = 0; ii < N; ii++)
556 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
559 #pragma acc wait (1)
563 for (i = 0; i < N; i++)
565 if (a[i] != 3.0)
566 abort ();
568 if (b[i] != 9.0)
569 abort ();
571 if (c[i] != 4.0)
572 abort ();
574 if (d[i] != 1.0)
575 abort ();
578 for (i = 0; i < N; i++)
580 a[i] = 2.0;
581 b[i] = 0.0;
582 c[i] = 0.0;
583 d[i] = 0.0;
584 e[i] = 0.0;
587 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
590 #pragma acc kernels async (1)
592 int ii;
594 for (ii = 0; ii < N; ii++)
595 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
598 #pragma acc kernels async (1)
600 int ii;
602 for (ii = 0; ii < N; ii++)
603 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
606 #pragma acc kernels async (1)
608 int ii;
610 for (ii = 0; ii < N; ii++)
611 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
614 #pragma acc kernels wait (1) async (1)
616 int ii;
618 for (ii = 0; ii < N; ii++)
619 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
622 #pragma acc wait (1)
626 for (i = 0; i < N; i++)
628 if (a[i] != 2.0)
629 abort ();
631 if (b[i] != 4.0)
632 abort ();
634 if (c[i] != 4.0)
635 abort ();
637 if (d[i] != 1.0)
638 abort ();
640 if (e[i] != 11.0)
641 abort ();
645 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
646 if (r != CUDA_SUCCESS)
648 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
649 abort ();
652 acc_set_cuda_stream (1, stream1);
654 for (i = 0; i < N; i++)
656 a[i] = 5.0;
657 b[i] = 0.0;
660 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
663 #pragma acc kernels async (1)
665 int ii;
667 for (ii = 0; ii < N; ii++)
668 b[ii] = a[ii];
671 #pragma acc wait (1)
675 for (i = 0; i < N; i++)
677 if (a[i] != 5.0)
678 abort ();
680 if (b[i] != 5.0)
681 abort ();
684 for (i = 0; i < N; i++)
686 a[i] = 7.0;
687 b[i] = 0.0;
688 c[i] = 0.0;
689 d[i] = 0.0;
692 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
695 #pragma acc kernels async (1)
697 int ii;
699 for (ii = 0; ii < N; ii++)
700 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
703 #pragma acc kernels async (1)
705 int ii;
707 for (ii = 0; ii < N; ii++)
708 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
711 #pragma acc kernels async (1)
713 int ii;
715 for (ii = 0; ii < N; ii++)
716 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
719 #pragma acc wait (1)
723 for (i = 0; i < N; i++)
725 if (a[i] != 7.0)
726 abort ();
728 if (b[i] != 49.0)
729 abort ();
731 if (c[i] != 4.0)
732 abort ();
734 if (d[i] != 1.0)
735 abort ();
738 for (i = 0; i < N; i++)
740 a[i] = 3.0;
741 b[i] = 0.0;
742 c[i] = 0.0;
743 d[i] = 0.0;
744 e[i] = 0.0;
747 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
750 #pragma acc kernels async (1)
752 int ii;
754 for (ii = 0; ii < N; ii++)
755 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
758 #pragma acc kernels async (1)
760 int ii;
762 for (ii = 0; ii < N; ii++)
763 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
766 #pragma acc kernels async (1)
768 int ii;
770 for (ii = 0; ii < N; ii++)
771 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
774 #pragma acc kernels wait (1) async (1)
776 int ii;
778 for (ii = 0; ii < N; ii++)
779 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
782 #pragma acc wait (1)
786 for (i = 0; i < N; i++)
788 if (a[i] != 3.0)
789 abort ();
791 if (b[i] != 9.0)
792 abort ();
794 if (c[i] != 4.0)
795 abort ();
797 if (d[i] != 1.0)
798 abort ();
800 if (e[i] != 17.0)
801 abort ();
804 for (i = 0; i < N; i++)
806 a[i] = 4.0;
807 b[i] = 0.0;
808 c[i] = 0.0;
809 d[i] = 0.0;
810 e[i] = 0.0;
813 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
816 #pragma acc kernels async (1)
818 int ii;
820 for (ii = 0; ii < N; ii++)
821 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
824 #pragma acc kernels async (1)
826 int ii;
828 for (ii = 0; ii < N; ii++)
829 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
832 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
836 for (i = 0; i < N; i++)
838 if (a[i] != 4.0)
839 abort ();
841 if (b[i] != 16.0)
842 abort ();
844 if (c[i] != 4.0)
845 abort ();
849 for (i = 0; i < N; i++)
851 a[i] = 5.0;
852 b[i] = 0.0;
853 c[i] = 0.0;
854 d[i] = 0.0;
855 e[i] = 0.0;
858 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
861 #pragma acc kernels async (1)
863 int ii;
865 for (ii = 0; ii < N; ii++)
866 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
869 #pragma acc kernels async (1)
871 int ii;
873 for (ii = 0; ii < N; ii++)
874 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
877 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
879 #pragma acc wait (1)
883 for (i = 0; i < N; i++)
885 if (a[i] != 5.0)
886 abort ();
888 if (b[i] != 25.0)
889 abort ();
891 if (c[i] != 4.0)
892 abort ();
895 acc_shutdown (acc_device_nvidia);
897 return 0;