Set num_threads to 50 on 32-bit hppa in two libgomp loop tests
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / asyncwait-1.c
blobe91642c162e80b6b277da63dbfe2985ad026cc20
1 /* { dg-do run } */
2 /* { dg-additional-options "-DUSE_CUDA_H" { target openacc_cuda } } */
3 /* { dg-additional-options "-lcuda" { target { openacc_nvidia_accel_selected && openacc_cuda } } } */
5 #include <openacc.h>
6 #include <stdlib.h>
7 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
8 #include "cuda.h"
9 #endif
11 #include <stdio.h>
12 #include <sys/time.h>
14 int
15 main (int argc, char **argv)
17 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
18 CUresult r;
19 CUstream stream1;
20 #endif
21 int N = 128; //1024 * 1024;
22 float *a, *b, *c, *d, *e;
23 int i;
24 int nbytes;
26 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
27 acc_init (acc_device_nvidia);
28 #endif
30 nbytes = N * sizeof (float);
32 a = (float *) malloc (nbytes);
33 b = (float *) malloc (nbytes);
34 c = (float *) malloc (nbytes);
35 d = (float *) malloc (nbytes);
36 e = (float *) malloc (nbytes);
38 for (i = 0; i < N; i++)
40 a[i] = 3.0;
41 b[i] = 0.0;
44 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
47 #pragma acc parallel async
49 int ii;
51 for (ii = 0; ii < N; ii++)
52 b[ii] = a[ii];
55 #pragma acc wait
59 for (i = 0; i < N; i++)
61 if (a[i] != 3.0)
62 abort ();
64 if (b[i] != 3.0)
65 abort ();
68 for (i = 0; i < N; i++)
70 a[i] = 2.0;
71 b[i] = 0.0;
74 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
77 #pragma acc parallel async (1)
79 int ii;
81 for (ii = 0; ii < N; ii++)
82 b[ii] = a[ii];
85 #pragma acc wait (1)
89 for (i = 0; i < N; i++)
91 if (a[i] != 2.0)
92 abort ();
94 if (b[i] != 2.0)
95 abort ();
98 for (i = 0; i < N; i++)
100 a[i] = 3.0;
101 b[i] = 0.0;
102 c[i] = 0.0;
103 d[i] = 0.0;
106 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
109 #pragma acc parallel async (1)
111 int ii;
113 for (ii = 0; ii < N; ii++)
114 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
117 #pragma acc parallel async (1)
119 int ii;
121 for (ii = 0; ii < N; ii++)
122 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
126 #pragma acc parallel async (1)
128 int ii;
130 for (ii = 0; ii < N; ii++)
131 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
134 #pragma acc wait (1)
138 for (i = 0; i < N; i++)
140 if (a[i] != 3.0)
141 abort ();
143 if (b[i] != 9.0)
144 abort ();
146 if (c[i] != 4.0)
147 abort ();
149 if (d[i] != 1.0)
150 abort ();
153 for (i = 0; i < N; i++)
155 a[i] = 2.0;
156 b[i] = 0.0;
157 c[i] = 0.0;
158 d[i] = 0.0;
159 e[i] = 0.0;
162 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
165 #pragma acc parallel async (1)
167 int ii;
169 for (ii = 0; ii < N; ii++)
170 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
173 #pragma acc parallel async (1)
175 int ii;
177 for (ii = 0; ii < N; ii++)
178 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
181 #pragma acc parallel async (1)
183 int ii;
185 for (ii = 0; ii < N; ii++)
186 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
189 #pragma acc parallel wait (1) async (1)
191 int ii;
193 for (ii = 0; ii < N; ii++)
194 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
197 #pragma acc wait (1)
201 for (i = 0; i < N; i++)
203 if (a[i] != 2.0)
204 abort ();
206 if (b[i] != 4.0)
207 abort ();
209 if (c[i] != 4.0)
210 abort ();
212 if (d[i] != 1.0)
213 abort ();
215 if (e[i] != 11.0)
216 abort ();
220 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
221 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
222 if (r != CUDA_SUCCESS)
224 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
225 abort ();
228 acc_set_cuda_stream (1, stream1);
229 #endif
231 for (i = 0; i < N; i++)
233 a[i] = 5.0;
234 b[i] = 0.0;
237 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
240 #pragma acc parallel async (1)
242 int ii;
244 for (ii = 0; ii < N; ii++)
245 b[ii] = a[ii];
248 #pragma acc wait (1)
252 for (i = 0; i < N; i++)
254 if (a[i] != 5.0)
255 abort ();
257 if (b[i] != 5.0)
258 abort ();
261 for (i = 0; i < N; i++)
263 a[i] = 7.0;
264 b[i] = 0.0;
265 c[i] = 0.0;
266 d[i] = 0.0;
269 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
272 #pragma acc parallel async (1)
274 int ii;
276 for (ii = 0; ii < N; ii++)
277 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
280 #pragma acc parallel async (1)
282 int ii;
284 for (ii = 0; ii < N; ii++)
285 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
288 #pragma acc parallel async (1)
290 int ii;
292 for (ii = 0; ii < N; ii++)
293 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
296 #pragma acc wait (1)
300 for (i = 0; i < N; i++)
302 if (a[i] != 7.0)
303 abort ();
305 if (b[i] != 49.0)
306 abort ();
308 if (c[i] != 4.0)
309 abort ();
311 if (d[i] != 1.0)
312 abort ();
315 for (i = 0; i < N; i++)
317 a[i] = 3.0;
318 b[i] = 0.0;
319 c[i] = 0.0;
320 d[i] = 0.0;
321 e[i] = 0.0;
324 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
327 #pragma acc parallel async (1)
329 int ii;
331 for (ii = 0; ii < N; ii++)
332 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
335 #pragma acc parallel async (1)
337 int ii;
339 for (ii = 0; ii < N; ii++)
340 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
343 #pragma acc parallel async (1)
345 int ii;
347 for (ii = 0; ii < N; ii++)
348 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
351 #pragma acc parallel wait (1) async (1)
353 int ii;
355 for (ii = 0; ii < N; ii++)
356 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
359 #pragma acc wait (1)
363 for (i = 0; i < N; i++)
365 if (a[i] != 3.0)
366 abort ();
368 if (b[i] != 9.0)
369 abort ();
371 if (c[i] != 4.0)
372 abort ();
374 if (d[i] != 1.0)
375 abort ();
377 if (e[i] != 17.0)
378 abort ();
381 for (i = 0; i < N; i++)
383 a[i] = 4.0;
384 b[i] = 0.0;
385 c[i] = 0.0;
386 d[i] = 0.0;
387 e[i] = 0.0;
390 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
393 #pragma acc parallel async (1)
395 int ii;
397 for (ii = 0; ii < N; ii++)
398 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
401 #pragma acc parallel async (1)
403 int ii;
405 for (ii = 0; ii < N; ii++)
406 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
409 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
413 for (i = 0; i < N; i++)
415 if (a[i] != 4.0)
416 abort ();
418 if (b[i] != 16.0)
419 abort ();
421 if (c[i] != 4.0)
422 abort ();
426 for (i = 0; i < N; i++)
428 a[i] = 5.0;
429 b[i] = 0.0;
430 c[i] = 0.0;
431 d[i] = 0.0;
432 e[i] = 0.0;
435 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
438 #pragma acc parallel async (1)
440 int ii;
442 for (ii = 0; ii < N; ii++)
443 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
446 #pragma acc parallel async (1)
448 int ii;
450 for (ii = 0; ii < N; ii++)
451 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
454 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
456 #pragma acc wait (1)
460 for (i = 0; i < N; i++)
462 if (a[i] != 5.0)
463 abort ();
465 if (b[i] != 25.0)
466 abort ();
468 if (c[i] != 4.0)
469 abort ();
472 for (i = 0; i < N; i++)
474 a[i] = 3.0;
475 b[i] = 0.0;
478 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
481 #pragma acc kernels async
483 int ii;
485 for (ii = 0; ii < N; ii++)
486 b[ii] = a[ii];
489 #pragma acc wait
493 for (i = 0; i < N; i++)
495 if (a[i] != 3.0)
496 abort ();
498 if (b[i] != 3.0)
499 abort ();
502 for (i = 0; i < N; i++)
504 a[i] = 2.0;
505 b[i] = 0.0;
508 #pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
511 #pragma acc kernels async (1)
513 int ii;
515 for (ii = 0; ii < N; ii++)
516 b[ii] = a[ii];
519 #pragma acc wait (1)
523 for (i = 0; i < N; i++)
525 if (a[i] != 2.0)
526 abort ();
528 if (b[i] != 2.0)
529 abort ();
532 for (i = 0; i < N; i++)
534 a[i] = 3.0;
535 b[i] = 0.0;
536 c[i] = 0.0;
537 d[i] = 0.0;
540 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
543 #pragma acc kernels async (1)
545 int ii;
547 for (ii = 0; ii < N; ii++)
548 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
551 #pragma acc kernels async (1)
553 int ii;
555 for (ii = 0; ii < N; ii++)
556 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
560 #pragma acc kernels async (1)
562 int ii;
564 for (ii = 0; ii < N; ii++)
565 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
568 #pragma acc wait (1)
572 for (i = 0; i < N; i++)
574 if (a[i] != 3.0)
575 abort ();
577 if (b[i] != 9.0)
578 abort ();
580 if (c[i] != 4.0)
581 abort ();
583 if (d[i] != 1.0)
584 abort ();
587 for (i = 0; i < N; i++)
589 a[i] = 2.0;
590 b[i] = 0.0;
591 c[i] = 0.0;
592 d[i] = 0.0;
593 e[i] = 0.0;
596 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
599 #pragma acc kernels async (1)
601 int ii;
603 for (ii = 0; ii < N; ii++)
604 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
607 #pragma acc kernels async (1)
609 int ii;
611 for (ii = 0; ii < N; ii++)
612 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
615 #pragma acc kernels async (1)
617 int ii;
619 for (ii = 0; ii < N; ii++)
620 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
623 #pragma acc kernels wait (1) async (1)
625 int ii;
627 for (ii = 0; ii < N; ii++)
628 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
631 #pragma acc wait (1)
635 for (i = 0; i < N; i++)
637 if (a[i] != 2.0)
638 abort ();
640 if (b[i] != 4.0)
641 abort ();
643 if (c[i] != 4.0)
644 abort ();
646 if (d[i] != 1.0)
647 abort ();
649 if (e[i] != 11.0)
650 abort ();
654 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
655 r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
656 if (r != CUDA_SUCCESS)
658 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
659 abort ();
662 acc_set_cuda_stream (1, stream1);
663 #endif
665 for (i = 0; i < N; i++)
667 a[i] = 5.0;
668 b[i] = 0.0;
671 #pragma acc data copy (a[0:N], b[0:N]) copyin (N)
674 #pragma acc kernels async (1)
676 int ii;
678 for (ii = 0; ii < N; ii++)
679 b[ii] = a[ii];
682 #pragma acc wait (1)
686 for (i = 0; i < N; i++)
688 if (a[i] != 5.0)
689 abort ();
691 if (b[i] != 5.0)
692 abort ();
695 for (i = 0; i < N; i++)
697 a[i] = 7.0;
698 b[i] = 0.0;
699 c[i] = 0.0;
700 d[i] = 0.0;
703 #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
706 #pragma acc kernels async (1)
708 int ii;
710 for (ii = 0; ii < N; ii++)
711 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
714 #pragma acc kernels async (1)
716 int ii;
718 for (ii = 0; ii < N; ii++)
719 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
722 #pragma acc kernels async (1)
724 int ii;
726 for (ii = 0; ii < N; ii++)
727 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
730 #pragma acc wait (1)
734 for (i = 0; i < N; i++)
736 if (a[i] != 7.0)
737 abort ();
739 if (b[i] != 49.0)
740 abort ();
742 if (c[i] != 4.0)
743 abort ();
745 if (d[i] != 1.0)
746 abort ();
749 for (i = 0; i < N; i++)
751 a[i] = 3.0;
752 b[i] = 0.0;
753 c[i] = 0.0;
754 d[i] = 0.0;
755 e[i] = 0.0;
758 #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
761 #pragma acc kernels async (1)
763 int ii;
765 for (ii = 0; ii < N; ii++)
766 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
769 #pragma acc kernels async (1)
771 int ii;
773 for (ii = 0; ii < N; ii++)
774 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
777 #pragma acc kernels async (1)
779 int ii;
781 for (ii = 0; ii < N; ii++)
782 d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
785 #pragma acc kernels wait (1) async (1)
787 int ii;
789 for (ii = 0; ii < N; ii++)
790 e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
793 #pragma acc wait (1)
797 for (i = 0; i < N; i++)
799 if (a[i] != 3.0)
800 abort ();
802 if (b[i] != 9.0)
803 abort ();
805 if (c[i] != 4.0)
806 abort ();
808 if (d[i] != 1.0)
809 abort ();
811 if (e[i] != 17.0)
812 abort ();
815 for (i = 0; i < N; i++)
817 a[i] = 4.0;
818 b[i] = 0.0;
819 c[i] = 0.0;
820 d[i] = 0.0;
821 e[i] = 0.0;
824 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
827 #pragma acc kernels async (1)
829 int ii;
831 for (ii = 0; ii < N; ii++)
832 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
835 #pragma acc kernels async (1)
837 int ii;
839 for (ii = 0; ii < N; ii++)
840 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
843 #pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
847 for (i = 0; i < N; i++)
849 if (a[i] != 4.0)
850 abort ();
852 if (b[i] != 16.0)
853 abort ();
855 if (c[i] != 4.0)
856 abort ();
860 for (i = 0; i < N; i++)
862 a[i] = 5.0;
863 b[i] = 0.0;
864 c[i] = 0.0;
865 d[i] = 0.0;
866 e[i] = 0.0;
869 #pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
872 #pragma acc kernels async (1)
874 int ii;
876 for (ii = 0; ii < N; ii++)
877 b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
880 #pragma acc kernels async (1)
882 int ii;
884 for (ii = 0; ii < N; ii++)
885 c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
888 #pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
890 #pragma acc wait (1)
894 for (i = 0; i < N; i++)
896 if (a[i] != 5.0)
897 abort ();
899 if (b[i] != 25.0)
900 abort ();
902 if (c[i] != 4.0)
903 abort ();
906 #if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H
907 acc_shutdown (acc_device_nvidia);
908 #endif
910 return 0;