1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
7 #include <cuda_runtime_api.h>
11 saxpy_host (int n
, float a
, float *x
, float *y
)
15 for (i
= 0; i
< n
; i
++)
16 y
[i
] = y
[i
] + a
* x
[i
];
21 saxpy_target (int n
, float a
, float *x
, float *y
)
25 for (i
= 0; i
< n
; i
++)
26 y
[i
] = y
[i
] + a
* x
[i
];
30 main(int argc
, char **argv
)
34 float x_ref
[N
], y_ref
[N
];
39 for (i
= 0; i
< N
; i
++)
41 x
[i
] = x_ref
[i
] = 4.0 + i
;
42 y
[i
] = y_ref
[i
] = 3.0;
45 saxpy_host (N
, a
, x_ref
, y_ref
);
49 #pragma acc data copyin (x[0:N]) copy (y[0:N])
51 #pragma acc host_data use_device (x, y)
53 cublasSaxpy (h
, N
, &a
, x
, 1, y
, 1);
57 for (i
= 0; i
< N
; i
++)
63 #pragma acc data create (x[0:N]) copyout (y[0:N])
66 for (i
= 0; i
< N
; i
++)
69 #pragma acc host_data use_device (x, y)
71 cublasSaxpy (h
, N
, &a
, x
, 1, y
, 1);
77 for (i
= 0; i
< N
; i
++)
83 for (i
= 0; i
< N
; i
++)
86 /* There's no need to use host_data here. */
87 #pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
89 #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
90 saxpy_target (N
, a
, x
, y
);
93 for (i
= 0; i
< N
; i
++)