Add -mcpu=power11 support.
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-fortran / host_data-4.f90
blob0daba8bfcdcbf83ed5bbfa384743a59d71253179
1 ! Test host_data interoperability with CUDA blas using modules.
3 ! { dg-do run { target openacc_nvidia_accel_selected } }
4 ! { dg-additional-options "-lcublas -Wall -Wextra" }
5 ! { dg-require-effective-target openacc_cublas }
7 module cublas
8 interface
9 subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
10 use iso_c_binding
11 integer(kind=c_int), value :: N
12 real(kind=c_float), value :: alpha
13 type(*), dimension(*) :: x
14 integer(kind=c_int), value :: incx
15 type(*), dimension(*) :: y
16 integer(kind=c_int), value :: incy
17 end subroutine cublassaxpy
18 end interface
20 contains
21 subroutine saxpy (nn, aa, xx, yy)
22 integer :: nn
23 real*4 :: aa, xx(nn), yy(nn)
24 integer i
25 !$acc routine
27 do i = 1, nn
28 yy(i) = yy(i) + aa * xx(i)
29 end do
30 end subroutine saxpy
32 subroutine validate_results (n, a, b)
33 integer :: n
34 real*4 :: a(n), b(n)
36 do i = 1, N
37 if (abs(a(i) - b(i)) > 0.0001) stop 1
38 end do
39 end subroutine validate_results
40 end module cublas
42 program test
43 use cublas
44 implicit none
46 integer, parameter :: N = 10
47 integer :: i
48 real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
50 a = 2.0
52 do i = 1, N
53 x(i) = 4.0 * i
54 y(i) = 3.0
55 x_ref(i) = x(i)
56 y_ref(i) = y(i)
57 end do
59 call saxpy (N, a, x_ref, y_ref)
61 !$acc data copyin (x) copy (y)
62 !$acc host_data use_device (x, y)
63 call cublassaxpy(N, a, x, 1, y, 1)
64 !$acc end host_data
65 !$acc end data
67 call validate_results (N, y, y_ref)
69 !$acc data create (x) copyout (y)
70 !$acc parallel loop
71 do i = 1, N
72 y(i) = 3.0
73 end do
74 !$acc end parallel loop
76 !$acc host_data use_device (x, y)
77 call cublassaxpy(N, a, x, 1, y, 1)
78 !$acc end host_data
79 !$acc end data
81 call validate_results (N, y, y_ref)
83 y(:) = 3.0
85 !$acc data copyin (x) copyin (a) copy (y)
86 !$acc parallel present (x) pcopy (y) present (a)
87 call saxpy (N, a, x, y)
88 !$acc end parallel
89 !$acc end data
91 call validate_results (N, y, y_ref)
93 y(:) = 3.0
95 !$acc enter data copyin (x, a, y)
96 !$acc parallel present (x) pcopy (y) present (a)
97 call saxpy (N, a, x, y)
98 !$acc end parallel
99 !$acc exit data delete (x, a) copyout (y)
101 call validate_results (N, y, y_ref)
102 end program test