1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3 /* { dg-require-effective-target openacc_cublas } */
4 /* { dg-require-effective-target openacc_cudart } */
9 #include <cuda_runtime_api.h>
10 #include <cublas_v2.h>
14 saxpy (int n
, float a
, float *x
, float *y
)
18 for (i
= 0; i
< n
; i
++)
20 y
[i
] = a
* x
[i
] + y
[i
];
25 context_check (CUcontext ctx1
)
30 r
= cuCtxGetCurrent (&ctx2
);
31 if (r
!= CUDA_SUCCESS
)
33 fprintf (stderr
, "cuCtxGetCurrent failed: %d\n", r
);
39 fprintf (stderr
, "new context established\n");
43 ctx3
= (CUcontext
) acc_get_current_cuda_context ();
47 fprintf (stderr
, "acc_get_current_cuda_context returned wrong value\n");
55 main (int argc
, char **argv
)
63 float *h_X
, *h_Y1
, *h_Y2
;
69 /* Test 3 - OpenACC creates, cuBLAS shares. */
71 acc_set_device_num (0, acc_device_nvidia
);
73 r
= cuCtxGetCurrent (&pctx
);
74 if (r
!= CUDA_SUCCESS
)
76 fprintf (stderr
, "cuCtxGetCurrent failed: %d\n", r
);
80 h_X
= (float *) malloc (N
* sizeof (float));
83 fprintf (stderr
, "malloc failed: for h_X\n");
87 h_Y1
= (float *) malloc (N
* sizeof (float));
90 fprintf (stderr
, "malloc failed: for h_Y1\n");
94 h_Y2
= (float *) malloc (N
* sizeof (float));
97 fprintf (stderr
, "malloc failed: for h_Y2\n");
101 for (i
= 0; i
< N
; i
++)
103 h_X
[i
] = rand () / (float) RAND_MAX
;
104 h_Y2
[i
] = h_Y1
[i
] = rand () / (float) RAND_MAX
;
107 d_X
= (float *) acc_copyin (&h_X
[0], N
* sizeof (float));
110 fprintf (stderr
, "copyin error h_X\n");
114 d_Y
= (float *) acc_copyin (&h_Y1
[0], N
* sizeof (float));
117 fprintf (stderr
, "copyin error h_Y1\n");
121 context_check (pctx
);
123 s
= cublasCreate (&h
);
124 if (s
!= CUBLAS_STATUS_SUCCESS
)
126 fprintf (stderr
, "cublasCreate failed: %d\n", s
);
130 context_check (pctx
);
132 s
= cublasSaxpy (h
, N
, &alpha
, d_X
, 1, d_Y
, 1);
133 if (s
!= CUBLAS_STATUS_SUCCESS
)
135 fprintf (stderr
, "cublasSaxpy failed: %d\n", s
);
139 context_check (pctx
);
141 acc_memcpy_from_device (&h_Y1
[0], d_Y
, N
* sizeof (float));
143 context_check (pctx
);
145 saxpy (N
, alpha
, h_X
, h_Y2
);
150 for (i
= 0; i
< N
; ++i
)
154 diff
= h_Y1
[i
] - h_Y2
[i
];
155 error_norm
+= diff
* diff
;
156 ref_norm
+= h_Y2
[i
] * h_Y2
[i
];
159 error_norm
= (float) sqrt ((double) error_norm
);
160 ref_norm
= (float) sqrt ((double) ref_norm
);
162 if ((fabs (ref_norm
) < 1e-7) || ((error_norm
/ ref_norm
) >= 1e-6f
))
164 fprintf (stderr
, "math error\n");
168 acc_delete (&h_X
[0], N
* sizeof (float));
169 acc_delete (&h_Y1
[0], N
* sizeof (float));
175 context_check (pctx
);
177 s
= cublasDestroy (h
);
178 if (s
!= CUBLAS_STATUS_SUCCESS
)
180 fprintf (stderr
, "cublasDestroy failed: %d\n", s
);
184 context_check (pctx
);
186 acc_shutdown (acc_device_nvidia
);
188 r
= cuCtxGetCurrent (&pctx
);
189 if (r
!= CUDA_SUCCESS
)
191 fprintf (stderr
, "cuCtxGetCurrent failed: %d\n", r
);
197 fprintf (stderr
, "Unexpected context\n");