libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c

   1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
   2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
   3 /* { dg-require-effective-target openacc_cublas } */
   4 /* { dg-require-effective-target openacc_cudart } */
   5
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8 #include <cuda.h>
   9 #include <cuda_runtime_api.h>
  10 #include <cublas_v2.h>
  11 #include <openacc.h>
  12
  13 void
  14 saxpy (int n, float a, float *x, float *y)
  15 {
  16     int i;
  17
  18     for (i = 0; i < n; i++)
  19     {
  20         y[i] = a * x[i] + y[i];
  21     }
  22 }
  23
  24 void
  25 context_check (CUcontext ctx1)
  26 {
  27     CUcontext ctx2, ctx3;
  28     CUresult r;
  29
  30     r = cuCtxGetCurrent (&ctx2);
  31     if (r != CUDA_SUCCESS)
  32     {
  33         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  34         exit (EXIT_FAILURE);
  35     }
  36
  37     if (ctx1 != ctx2)
  38     {
  39         fprintf (stderr, "new context established\n");
  40         exit (EXIT_FAILURE);
  41     }
  42
  43     ctx3 = (CUcontext) acc_get_current_cuda_context ();
  44
  45     if (ctx1 != ctx3)
  46     {
  47         fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
  48         exit (EXIT_FAILURE);
  49     }
  50
  51     return;
  52 }
  53
  54 int
  55 main (int argc, char **argv)
  56 {
  57     cublasStatus_t s;
  58     cublasHandle_t h;
  59     CUcontext pctx;
  60     CUresult r;
  61     int i;
  62     const int N = 256;
  63     float *h_X, *h_Y1, *h_Y2;
  64     float *d_X,*d_Y;
  65     float alpha = 2.0f;
  66     float error_norm;
  67     float ref_norm;
  68
  69     /* Test 4 - OpenACC creates, cuBLAS shares.  */
  70
  71     acc_set_device_num (0, acc_device_nvidia);
  72
  73     r = cuCtxGetCurrent (&pctx);
  74     if (r != CUDA_SUCCESS)
  75     {
  76         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  77         exit (EXIT_FAILURE);
  78     }
  79
  80     h_X = (float *) malloc (N * sizeof (float));
  81     if (h_X == 0)
  82     {
  83         fprintf (stderr, "malloc failed: for h_X\n");
  84         exit (EXIT_FAILURE);
  85     }
  86
  87     h_Y1 = (float *) malloc (N * sizeof (float));
  88     if (h_Y1 == 0)
  89     {
  90         fprintf (stderr, "malloc failed: for h_Y1\n");
  91         exit (EXIT_FAILURE);
  92     }
  93
  94     h_Y2 = (float *) malloc (N * sizeof (float));
  95     if (h_Y2 == 0)
  96     {
  97         fprintf (stderr, "malloc failed: for h_Y2\n");
  98         exit (EXIT_FAILURE);
  99     }
 100
 101     for (i = 0; i < N; i++)
 102     {
 103         h_X[i] = rand () / (float) RAND_MAX;
 104         h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
 105     }
 106
 107 #pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
 108     {
 109         int i;
 110
 111         for (i = 0; i < N; i++)
 112         {
 113             h_Y2[i] = alpha * h_X[i] + h_Y2[i];
 114         }
 115     }
 116
 117     r = cuCtxGetCurrent (&pctx);
 118     if (r != CUDA_SUCCESS)
 119     {
 120         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
 121         exit (EXIT_FAILURE);
 122     }
 123
 124     d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
 125     if (d_X == NULL)
 126     {
 127         fprintf (stderr, "copyin error h_Y1\n");
 128         exit (EXIT_FAILURE);
 129     }
 130
 131     d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
 132     if (d_Y == NULL)
 133     {
 134         fprintf (stderr, "copyin error h_Y1\n");
 135         exit (EXIT_FAILURE);
 136     }
 137
 138     s = cublasCreate (&h);
 139     if (s != CUBLAS_STATUS_SUCCESS)
 140     {
 141         fprintf (stderr, "cublasCreate failed: %d\n", s);
 142         exit (EXIT_FAILURE);
 143     }
 144
 145     context_check (pctx);
 146
 147     s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
 148     if (s != CUBLAS_STATUS_SUCCESS)
 149     {
 150         fprintf (stderr, "cublasSaxpy failed: %d\n", s);
 151         exit (EXIT_FAILURE);
 152     }
 153
 154     context_check (pctx);
 155
 156     acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
 157
 158     context_check (pctx);
 159
 160     error_norm = 0;
 161     ref_norm = 0;
 162
 163     for (i = 0; i < N; ++i)
 164     {
 165         float diff;
 166
 167         diff = h_Y1[i] - h_Y2[i];
 168         error_norm += diff * diff;
 169         ref_norm += h_Y2[i] * h_Y2[i];
 170     }
 171
 172     error_norm = (float) sqrt ((double) error_norm);
 173     ref_norm = (float) sqrt ((double) ref_norm);
 174
 175     if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
 176     {
 177         fprintf (stderr, "math error\n");
 178         exit (EXIT_FAILURE);
 179     }
 180
 181     acc_delete (&h_X[0], N * sizeof (float));
 182     acc_delete (&h_Y1[0], N * sizeof (float));
 183
 184     free (h_X);
 185     free (h_Y1);
 186     free (h_Y2);
 187
 188     context_check (pctx);
 189
 190     s = cublasDestroy (h);
 191     if (s != CUBLAS_STATUS_SUCCESS)
 192     {
 193         fprintf (stderr, "cublasDestroy failed: %d\n", s);
 194         exit (EXIT_FAILURE);
 195     }
 196
 197     context_check (pctx);
 198
 199     acc_shutdown (acc_device_nvidia);
 200
 201     r = cuCtxGetCurrent (&pctx);
 202     if (r != CUDA_SUCCESS)
 203     {
 204         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
 205         exit (EXIT_FAILURE);
 206     }
 207
 208     if (pctx)
 209     {
 210         fprintf (stderr, "Unexpected context\n");
 211         exit (EXIT_FAILURE);
 212     }
 213
 214     return EXIT_SUCCESS;
 215 }