libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c

   1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
   2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
   3
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <cuda.h>
   7 #include <cuda_runtime_api.h>
   8 #include <cublas_v2.h>
   9 #include <openacc.h>
  10
  11 void
  12 saxpy (int n, float a, float *x, float *y)
  13 {
  14     int i;
  15
  16     for (i = 0; i < n; i++)
  17     {
  18         y[i] = a * x[i] + y[i];
  19     }
  20 }
  21
  22 void
  23 context_check (CUcontext ctx1)
  24 {
  25     CUcontext ctx2, ctx3;
  26     CUresult r;
  27
  28     r = cuCtxGetCurrent (&ctx2);
  29     if (r != CUDA_SUCCESS)
  30     {
  31         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  32         exit (EXIT_FAILURE);
  33     }
  34
  35     if (ctx1 != ctx2)
  36     {
  37         fprintf (stderr, "new context established\n");
  38         exit (EXIT_FAILURE);
  39     }
  40
  41     ctx3 = (CUcontext) acc_get_current_cuda_context ();
  42
  43     if (ctx1 != ctx3)
  44     {
  45         fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
  46         exit (EXIT_FAILURE);
  47     }
  48
  49     return;
  50 }
  51
  52 int
  53 main (int argc, char **argv)
  54 {
  55     cublasStatus_t s;
  56     cublasHandle_t h;
  57     CUcontext pctx;
  58     CUresult r;
  59     int i;
  60     const int N = 256;
  61     float *h_X, *h_Y1, *h_Y2;
  62     float *d_X,*d_Y;
  63     float alpha = 2.0f;
  64     float error_norm;
  65     float ref_norm;
  66
  67     /* Test 3 - OpenACC creates, cuBLAS shares.  */
  68
  69     acc_set_device_num (0, acc_device_nvidia);
  70
  71     r = cuCtxGetCurrent (&pctx);
  72     if (r != CUDA_SUCCESS)
  73     {
  74         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
  75         exit (EXIT_FAILURE);
  76     }
  77
  78     h_X = (float *) malloc (N * sizeof (float));
  79     if (h_X == 0)
  80     {
  81         fprintf (stderr, "malloc failed: for h_X\n");
  82         exit (EXIT_FAILURE);
  83     }
  84
  85     h_Y1 = (float *) malloc (N * sizeof (float));
  86     if (h_Y1 == 0)
  87     {
  88         fprintf (stderr, "malloc failed: for h_Y1\n");
  89         exit (EXIT_FAILURE);
  90     }
  91
  92     h_Y2 = (float *) malloc (N * sizeof (float));
  93     if (h_Y2 == 0)
  94     {
  95         fprintf (stderr, "malloc failed: for h_Y2\n");
  96         exit (EXIT_FAILURE);
  97     }
  98
  99     for (i = 0; i < N; i++)
 100     {
 101         h_X[i] = rand () / (float) RAND_MAX;
 102         h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
 103     }
 104
 105     d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
 106     if (d_X == NULL)
 107     {
 108         fprintf (stderr, "copyin error h_X\n");
 109         exit (EXIT_FAILURE);
 110     }
 111
 112     d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
 113     if (d_Y == NULL)
 114     {
 115         fprintf (stderr, "copyin error h_Y1\n");
 116         exit (EXIT_FAILURE);
 117     }
 118
 119     context_check (pctx);
 120
 121     s = cublasCreate (&h);
 122     if (s != CUBLAS_STATUS_SUCCESS)
 123     {
 124         fprintf (stderr, "cublasCreate failed: %d\n", s);
 125         exit (EXIT_FAILURE);
 126     }
 127
 128     context_check (pctx);
 129
 130     s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
 131     if (s != CUBLAS_STATUS_SUCCESS)
 132     {
 133         fprintf (stderr, "cublasSaxpy failed: %d\n", s);
 134         exit (EXIT_FAILURE);
 135     }
 136
 137     context_check (pctx);
 138
 139     acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
 140
 141     context_check (pctx);
 142
 143     saxpy (N, alpha, h_X, h_Y2);
 144
 145     error_norm = 0;
 146     ref_norm = 0;
 147
 148     for (i = 0; i < N; ++i)
 149     {
 150         float diff;
 151
 152         diff = h_Y1[i] - h_Y2[i];
 153         error_norm += diff * diff;
 154         ref_norm += h_Y2[i] * h_Y2[i];
 155     }
 156
 157     error_norm = (float) sqrt ((double) error_norm);
 158     ref_norm = (float) sqrt ((double) ref_norm);
 159
 160     if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
 161     {
 162         fprintf (stderr, "math error\n");
 163         exit (EXIT_FAILURE);
 164     }
 165
 166     free (h_X);
 167     free (h_Y1);
 168     free (h_Y2);
 169
 170     acc_free (d_X);
 171     acc_free (d_Y);
 172
 173     context_check (pctx);
 174
 175     s = cublasDestroy (h);
 176     if (s != CUBLAS_STATUS_SUCCESS)
 177     {
 178         fprintf (stderr, "cublasDestroy failed: %d\n", s);
 179         exit (EXIT_FAILURE);
 180     }
 181
 182     context_check (pctx);
 183
 184     acc_shutdown (acc_device_nvidia);
 185
 186     r = cuCtxGetCurrent (&pctx);
 187     if (r != CUDA_SUCCESS)
 188     {
 189         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
 190         exit (EXIT_FAILURE);
 191     }
 192
 193     if (pctx)
 194     {
 195         fprintf (stderr, "Unexpected context\n");
 196         exit (EXIT_FAILURE);
 197     }
 198
 199     return EXIT_SUCCESS;
 200 }