libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c

   1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
   2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
   3
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <cuda.h>
   7 #include <cuda_runtime_api.h>
   8 #include <cublas_v2.h>
   9 #include <openacc.h>
  10
  11 void
  12 saxpy (int n, float a, float *x, float *y)
  13 {
  14     int i;
  15
  16     for (i = 0; i < n; i++)
  17     {
  18         y[i] = a * x[i] + y[i];
  19     }
  20 }
  21
  22 void
  23 context_check (CUcontext ctx1)
  24 {
  25     CUcontext ctx2, ctx3;
  26     CUresult r;
  27
  28     r = cuCtxGetCurrent (&ctx2);
  29     if (r != CUDA_SUCCESS)
  30     {
  31         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  32         exit (EXIT_FAILURE);
  33     }
  34
  35     if (ctx1 != ctx2)
  36     {
  37         fprintf (stderr, "new context established\n");
  38         exit (EXIT_FAILURE);
  39     }
  40
  41     ctx3 = (CUcontext) acc_get_current_cuda_context ();
  42
  43     if (ctx1 != ctx3)
  44     {
  45         fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
  46         exit (EXIT_FAILURE);
  47     }
  48
  49     return;
  50 }
  51
  52 int
  53 main (int argc, char **argv)
  54 {
  55     cublasStatus_t s;
  56     cudaError_t e;
  57     cublasHandle_t h;
  58     CUcontext pctx, ctx;
  59     CUresult r;
  60     int dev;
  61     int i;
  62     const int N = 256;
  63     float *h_X, *h_Y1, *h_Y2;
  64     float *d_X,*d_Y;
  65     float alpha = 2.0f;
  66     float error_norm;
  67     float ref_norm;
  68
  69     /* Test 2 - cuBLAS creates, OpenACC shares.  */
  70
  71     s = cublasCreate (&h);
  72     if (s != CUBLAS_STATUS_SUCCESS)
  73     {
  74         fprintf (stderr, "cublasCreate failed: %d\n", s);
  75         exit (EXIT_FAILURE);
  76     }
  77
  78     r = cuCtxGetCurrent (&pctx);
  79     if (r != CUDA_SUCCESS)
  80     {
  81         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  82         exit (EXIT_FAILURE);
  83     }
  84
  85     e = cudaGetDevice (&dev);
  86     if (e != cudaSuccess)
  87     {
  88         fprintf (stderr, "cudaGetDevice failed: %d\n", e);
  89         exit (EXIT_FAILURE);
  90     }
  91
  92     acc_set_device_num (dev, acc_device_nvidia);
  93
  94     h_X = (float *) malloc (N * sizeof (float));
  95     if (h_X == 0)
  96     {
  97         fprintf (stderr, "malloc failed: for h_X\n");
  98         exit (EXIT_FAILURE);
  99     }
 100
 101     h_Y1 = (float *) malloc (N * sizeof (float));
 102     if (h_Y1 == 0)
 103     {
 104         fprintf (stderr, "malloc failed: for h_Y1\n");
 105         exit (EXIT_FAILURE);
 106     }
 107
 108     h_Y2 = (float *) malloc (N * sizeof (float));
 109     if (h_Y2 == 0)
 110     {
 111         fprintf (stderr, "malloc failed: for h_Y2\n");
 112         exit (EXIT_FAILURE);
 113     }
 114
 115     for (i = 0; i < N; i++)
 116     {
 117         h_X[i] = rand () / (float) RAND_MAX;
 118         h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
 119     }
 120
 121     d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
 122     if (d_X == NULL)
 123     {
 124         fprintf (stderr, "copyin error h_X\n");
 125         exit (EXIT_FAILURE);
 126     }
 127
 128     context_check (pctx);
 129
 130     d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
 131     if (d_Y == NULL)
 132     {
 133         fprintf (stderr, "copyin error h_Y1\n");
 134         exit (EXIT_FAILURE);
 135     }
 136
 137     context_check (pctx);
 138
 139     s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
 140     if (s != CUBLAS_STATUS_SUCCESS)
 141     {
 142         fprintf (stderr, "cublasSaxpy failed: %d\n", s);
 143         exit (EXIT_FAILURE);
 144     }
 145
 146     context_check (pctx);
 147
 148     acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
 149
 150     context_check (pctx);
 151
 152 #pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha)
 153     {
 154         int i;
 155
 156         for (i = 0; i < N; i++)
 157         {
 158             h_Y2[i] = alpha * h_X[i] + h_Y2[i];
 159         }
 160     }
 161
 162     context_check (pctx);
 163
 164     error_norm = 0;
 165     ref_norm = 0;
 166
 167     for (i = 0; i < N; ++i)
 168     {
 169         float diff;
 170
 171         diff = h_Y1[i] - h_Y2[i];
 172         error_norm += diff * diff;
 173         ref_norm += h_Y2[i] * h_Y2[i];
 174     }
 175
 176     error_norm = (float) sqrt ((double) error_norm);
 177     ref_norm = (float) sqrt ((double) ref_norm);
 178
 179     if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
 180     {
 181         fprintf (stderr, "math error\n");
 182         exit (EXIT_FAILURE);
 183     }
 184
 185     free (h_X);
 186     free (h_Y1);
 187     free (h_Y2);
 188
 189     acc_free (d_X);
 190     acc_free (d_Y);
 191
 192     context_check (pctx);
 193
 194     s = cublasDestroy (h);
 195     if (s != CUBLAS_STATUS_SUCCESS)
 196     {
 197         fprintf (stderr, "cublasDestroy failed: %d\n", s);
 198         exit (EXIT_FAILURE);
 199     }
 200
 201     acc_shutdown (acc_device_nvidia);
 202
 203     r = cuCtxGetCurrent (&ctx);
 204     if (r != CUDA_SUCCESS)
 205     {
 206         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
 207         exit (EXIT_FAILURE);
 208     }
 209
 210     if (!ctx)
 211     {
 212         fprintf (stderr, "Expected context\n");
 213         exit (EXIT_FAILURE);
 214     }
 215
 216     if (pctx != ctx)
 217     {
 218         fprintf (stderr, "Unexpected new context\n");
 219         exit (EXIT_FAILURE);
 220     }
 221
 222     return EXIT_SUCCESS;
 223 }