1 ! Test host_data interoperability with CUDA blas using modules.
3 ! { dg-do run { target openacc_nvidia_accel_selected } }
4 ! { dg-additional-options "-lcublas -Wall -Wextra" }
5 ! { dg-require-effective-target openacc_cublas }
9 subroutine cublassaxpy(N
, alpha
, x
, incx
, y
, incy
) bind(c
, name
="cublasSaxpy")
11 integer(kind
=c_int
), value
:: N
12 real(kind
=c_float
), value
:: alpha
13 type(*), dimension(*) :: x
14 integer(kind
=c_int
), value
:: incx
15 type(*), dimension(*) :: y
16 integer(kind
=c_int
), value
:: incy
17 end subroutine cublassaxpy
21 subroutine saxpy (nn
, aa
, xx
, yy
)
23 real*4 :: aa
, xx(nn
), yy(nn
)
28 yy(i
) = yy(i
) + aa
* xx(i
)
32 subroutine validate_results (n
, a
, b
)
37 if (abs(a(i
) - b(i
)) > 0.0001) stop 1
39 end subroutine validate_results
46 integer, parameter :: N
= 10
48 real*4 :: x_ref(N
), y_ref(N
), x(N
), y(N
), a
59 call saxpy (N
, a
, x_ref
, y_ref
)
61 !$acc data copyin (x) copy (y)
62 !$acc host_data use_device (x, y)
63 call cublassaxpy(N
, a
, x
, 1, y
, 1)
67 call validate_results (N
, y
, y_ref
)
69 !$acc data create (x) copyout (y)
74 !$acc end parallel loop
76 !$acc host_data use_device (x, y)
77 call cublassaxpy(N
, a
, x
, 1, y
, 1)
81 call validate_results (N
, y
, y_ref
)
85 !$acc data copyin (x) copyin (a) copy (y)
86 !$acc parallel present (x) pcopy (y) present (a)
87 call saxpy (N
, a
, x
, y
)
91 call validate_results (N
, y
, y_ref
)
95 !$acc enter data copyin (x, a, y)
96 !$acc parallel present (x) pcopy (y) present (a)
97 call saxpy (N
, a
, x
, y
)
99 !$acc exit data delete (x, a) copyout (y)
101 call validate_results (N
, y
, y_ref
)