1 ! Test host_data interoperability with CUDA blas. This test was
2 ! derived from libgomp.oacc-c-c++-common/host_data-1.c.
4 ! { dg-do run { target openacc_nvidia_accel_selected } }
5 ! { dg-additional-options "-lcublas -Wall -Wextra" }
10 integer, parameter :: N
= 10
12 real*4 :: x_ref(N
), y_ref(N
), x(N
), y(N
), a
15 subroutine cublassaxpy(N
, alpha
, x
, incx
, y
, incy
) bind(c
, name
="cublasSaxpy")
17 integer(kind
=c_int
), value
:: N
18 real(kind
=c_float
), value
:: alpha
19 type(*), dimension(*) :: x
20 integer(kind
=c_int
), value
:: incx
21 type(*), dimension(*) :: y
22 integer(kind
=c_int
), value
:: incy
23 end subroutine cublassaxpy
35 call saxpy (N
, a
, x_ref
, y_ref
)
37 !$acc data copyin (x) copy (y)
38 !$acc host_data use_device (x, y)
39 call cublassaxpy(N
, a
, x
, 1, y
, 1)
43 call validate_results (N
, y
, y_ref
)
45 !$acc data create (x) copyout (y)
50 !$acc end parallel loop
52 !$acc host_data use_device (x, y)
53 call cublassaxpy(N
, a
, x
, 1, y
, 1)
57 call validate_results (N
, y
, y_ref
)
61 !$acc data copyin (x) copyin (a) copy (y)
62 !$acc parallel present (x) pcopy (y) present (a)
63 call saxpy (N
, a
, x
, y
)
67 call validate_results (N
, y
, y_ref
)
71 !$acc enter data copyin (x, a, y)
72 !$acc parallel present (x) pcopy (y) present (a)
73 call saxpy (N
, a
, x
, y
)
75 !$acc exit data delete (x, a) copyout (y)
77 call validate_results (N
, y
, y_ref
)
80 subroutine saxpy (nn
, aa
, xx
, yy
)
82 real*4 :: aa
, xx(nn
), yy(nn
)
87 yy(i
) = yy(i
) + aa
* xx(i
)
91 subroutine validate_results (n
, a
, b
)
96 if (abs(a(i
) - b(i
)) > 0.0001) call abort
98 end subroutine validate_results