1 ! Test host_data interoperability with CUDA blas. This test was
2 ! derived from libgomp.oacc-c-c++-common/host_data-1.c.
4 ! { dg-do run { target openacc_nvidia_accel_selected } }
5 ! { dg-additional-options "-lcublas -Wall -Wextra" }
6 ! { dg-require-effective-target openacc_cublas }
11 integer, parameter :: N
= 10
13 real*4 :: x_ref(N
), y_ref(N
), x(N
), y(N
), a
16 subroutine cublassaxpy(N
, alpha
, x
, incx
, y
, incy
) bind(c
, name
="cublasSaxpy")
18 integer(kind
=c_int
), value
:: N
19 real(kind
=c_float
), value
:: alpha
20 type(*), dimension(*) :: x
21 integer(kind
=c_int
), value
:: incx
22 type(*), dimension(*) :: y
23 integer(kind
=c_int
), value
:: incy
24 end subroutine cublassaxpy
36 call saxpy (N
, a
, x_ref
, y_ref
)
38 !$acc data copyin (x) copy (y)
39 !$acc host_data use_device (x, y)
40 call cublassaxpy(N
, a
, x
, 1, y
, 1)
44 call validate_results (N
, y
, y_ref
)
46 !$acc data create (x) copyout (y)
51 !$acc end parallel loop
53 !$acc host_data use_device (x, y)
54 call cublassaxpy(N
, a
, x
, 1, y
, 1)
58 call validate_results (N
, y
, y_ref
)
62 !$acc data copyin (x) copyin (a) copy (y)
63 !$acc parallel present (x) pcopy (y) present (a)
64 call saxpy (N
, a
, x
, y
)
68 call validate_results (N
, y
, y_ref
)
72 !$acc enter data copyin (x, a, y)
73 !$acc parallel present (x) pcopy (y) present (a)
74 call saxpy (N
, a
, x
, y
)
76 !$acc exit data delete (x, a) copyout (y)
78 call validate_results (N
, y
, y_ref
)
81 subroutine saxpy (nn
, aa
, xx
, yy
)
83 real*4 :: aa
, xx(nn
), yy(nn
)
88 yy(i
) = yy(i
) + aa
* xx(i
)
92 subroutine validate_results (n
, a
, b
)
97 if (abs(a(i
) - b(i
)) > 0.0001) stop 1
99 end subroutine validate_results