1 ! Test host_data interoperability with CUDA blas using modules.
3 ! { dg-do run { target openacc_nvidia_accel_selected } }
4 ! { dg-additional-options "-lcublas -Wall -Wextra" }
8 subroutine cublassaxpy(N
, alpha
, x
, incx
, y
, incy
) bind(c
, name
="cublasSaxpy")
10 integer(kind
=c_int
), value
:: N
11 real(kind
=c_float
), value
:: alpha
12 type(*), dimension(*) :: x
13 integer(kind
=c_int
), value
:: incx
14 type(*), dimension(*) :: y
15 integer(kind
=c_int
), value
:: incy
16 end subroutine cublassaxpy
20 subroutine saxpy (nn
, aa
, xx
, yy
)
22 real*4 :: aa
, xx(nn
), yy(nn
)
27 yy(i
) = yy(i
) + aa
* xx(i
)
31 subroutine validate_results (n
, a
, b
)
36 if (abs(a(i
) - b(i
)) > 0.0001) call abort
38 end subroutine validate_results
45 integer, parameter :: N
= 10
47 real*4 :: x_ref(N
), y_ref(N
), x(N
), y(N
), a
58 call saxpy (N
, a
, x_ref
, y_ref
)
60 !$acc data copyin (x) copy (y)
61 !$acc host_data use_device (x, y)
62 call cublassaxpy(N
, a
, x
, 1, y
, 1)
66 call validate_results (N
, y
, y_ref
)
68 !$acc data create (x) copyout (y)
73 !$acc end parallel loop
75 !$acc host_data use_device (x, y)
76 call cublassaxpy(N
, a
, x
, 1, y
, 1)
80 call validate_results (N
, y
, y_ref
)
84 !$acc data copyin (x) copyin (a) copy (y)
85 !$acc parallel present (x) pcopy (y) present (a)
86 call saxpy (N
, a
, x
, y
)
90 call validate_results (N
, y
, y_ref
)
94 !$acc enter data copyin (x, a, y)
95 !$acc parallel present (x) pcopy (y) present (a)
96 call saxpy (N
, a
, x
, y
)
98 !$acc exit data delete (x, a) copyout (y)
100 call validate_results (N
, y
, y_ref
)