1 ! Fixed
-mode host_data interaction with CUDA BLAS
.
3 ! { dg
-do run
{ target openacc_nvidia_accel_selected
} }
4 ! { dg
-additional
-options
"-lcublas -Wall -Wextra" }
6 include
"cublas-fixed.h"
8 integer, parameter :: N
= 10
10 real*4
:: x_ref
(N
), y_ref
(N
), x
(N
), y
(N
), a
21 call saxpy
(N
, a
, x_ref
, y_ref
)
23 !$acc data copyin
(x
) copy
(y
)
24 !$acc host_data use_device
(x
, y
)
25 call cublassaxpy
(N
, a
, x
, 1, y
, 1)
29 call validate_results
(N
, y
, y_ref
)
31 !$acc data create
(x
) copyout
(y
)
36 !$acc
end parallel loop
38 !$acc host_data use_device
(x
, y
)
39 call cublassaxpy
(N
, a
, x
, 1, y
, 1)
43 call validate_results
(N
, y
, y_ref
)
47 !$acc data copyin
(x
) copyin
(a
) copy
(y
)
48 !$acc parallel present
(x
) pcopy
(y
) present
(a
)
49 call saxpy
(N
, a
, x
, y
)
53 call validate_results
(N
, y
, y_ref
)
57 !$acc enter data copyin
(x
, a
, y
)
58 !$acc parallel present
(x
) pcopy
(y
) present
(a
)
59 call saxpy
(N
, a
, x
, y
)
61 !$acc exit data delete
(x
, a
) copyout
(y
)
63 call validate_results
(N
, y
, y_ref
)
66 subroutine saxpy
(nn
, aa
, xx
, yy
)
68 real*4
:: aa
, xx
(nn
), yy
(nn
)
73 yy
(i
) = yy
(i
) + aa
* xx
(i
)
77 subroutine validate_results
(n
, a
, b
)
82 if (abs
(a
(i
) - b
(i
)) > 0.0001) call abort
84 end subroutine validate_results