PR target/82855
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / host_data-1.c
blob51745ba726d760b6d925a3b4b520c7a685785581
1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
4 #include <stdlib.h>
5 #include <openacc.h>
6 #include <cuda.h>
7 #include <cuda_runtime_api.h>
8 #include <cublas_v2.h>
10 void
11 saxpy_host (int n, float a, float *x, float *y)
13 int i;
15 for (i = 0; i < n; i++)
16 y[i] = y[i] + a * x[i];
19 #pragma acc routine
20 void
21 saxpy_target (int n, float a, float *x, float *y)
23 int i;
25 for (i = 0; i < n; i++)
26 y[i] = y[i] + a * x[i];
29 int
30 main(int argc, char **argv)
32 #define N 8
33 int i;
34 float x_ref[N], y_ref[N];
35 float x[N], y[N];
36 cublasHandle_t h;
37 float a = 2.0;
39 for (i = 0; i < N; i++)
41 x[i] = x_ref[i] = 4.0 + i;
42 y[i] = y_ref[i] = 3.0;
45 saxpy_host (N, a, x_ref, y_ref);
47 cublasCreate (&h);
49 #pragma acc data copyin (x[0:N]) copy (y[0:N])
51 #pragma acc host_data use_device (x, y)
53 cublasSaxpy (h, N, &a, x, 1, y, 1);
57 for (i = 0; i < N; i++)
59 if (y[i] != y_ref[i])
60 abort ();
63 #pragma acc data create (x[0:N]) copyout (y[0:N])
65 #pragma acc kernels
66 for (i = 0; i < N; i++)
67 y[i] = 3.0;
69 #pragma acc host_data use_device (x, y)
71 cublasSaxpy (h, N, &a, x, 1, y, 1);
75 cublasDestroy (h);
77 for (i = 0; i < N; i++)
79 if (y[i] != y_ref[i])
80 abort ();
83 for (i = 0; i < N; i++)
84 y[i] = 3.0;
86 /* There's no need to use host_data here. */
87 #pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
89 #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
90 saxpy_target (N, a, x, y);
93 for (i = 0; i < N; i++)
95 if (y[i] != y_ref[i])
96 abort ();
99 return 0;