1 ! Fixed
-mode host_data interaction with CUDA BLAS
.
3 ! { dg
-do run
{ target openacc_nvidia_accel_selected
} }
4 ! { dg
-additional
-options
"-lcublas -Wall -Wextra" }
5 ! { dg
-require
-effective
-target openacc_cublas
}
7 include
"cublas-fixed.h"
9 integer, parameter :: N
= 10
11 real*4
:: x_ref
(N
), y_ref
(N
), x
(N
), y
(N
), a
22 call saxpy
(N
, a
, x_ref
, y_ref
)
24 !$acc data copyin
(x
) copy
(y
)
25 !$acc host_data use_device
(x
, y
)
26 call cublassaxpy
(N
, a
, x
, 1, y
, 1)
30 call validate_results
(N
, y
, y_ref
)
32 !$acc data create
(x
) copyout
(y
)
37 !$acc
end parallel loop
39 !$acc host_data use_device
(x
, y
)
40 call cublassaxpy
(N
, a
, x
, 1, y
, 1)
44 call validate_results
(N
, y
, y_ref
)
48 !$acc data copyin
(x
) copyin
(a
) copy
(y
)
49 !$acc parallel present
(x
) pcopy
(y
) present
(a
)
50 call saxpy
(N
, a
, x
, y
)
54 call validate_results
(N
, y
, y_ref
)
58 !$acc enter data copyin
(x
, a
, y
)
59 !$acc parallel present
(x
) pcopy
(y
) present
(a
)
60 call saxpy
(N
, a
, x
, y
)
62 !$acc exit data delete
(x
, a
) copyout
(y
)
64 call validate_results
(N
, y
, y_ref
)
67 subroutine saxpy
(nn
, aa
, xx
, yy
)
69 real*4
:: aa
, xx
(nn
), yy
(nn
)
74 yy
(i
) = yy
(i
) + aa
* xx
(i
)
78 subroutine validate_results
(n
, a
, b
)
83 if (abs
(a
(i
) - b
(i
)) > 0.0001) stop 1
85 end subroutine validate_results