libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c

   1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
   2 /* { dg-additional-options "-lcuda" } */
   3
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <unistd.h>
   7 #include <openacc.h>
   8 #include <cuda.h>
   9
  10 int
  11 main (int argc, char **argv)
  12 {
  13   CUdevice dev;
  14   CUfunction delay;
  15   CUmodule module;
  16   CUresult r;
  17   const int N = 10;
  18   int i;
  19   CUstream streams[N];
  20   unsigned long *a, *d_a, dticks;
  21   int nbytes;
  22   float dtime;
  23   void *kargs[2];
  24   int clkrate;
  25   int devnum, nprocs;
  26
  27   acc_init (acc_device_nvidia);
  28
  29   devnum = acc_get_device_num (acc_device_nvidia);
  30
  31   r = cuDeviceGet (&dev, devnum);
  32   if (r != CUDA_SUCCESS)
  33     {
  34       fprintf (stderr, "cuDeviceGet failed: %d\n", r);
  35       abort ();
  36     }
  37
  38   r =
  39     cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
  40                           dev);
  41   if (r != CUDA_SUCCESS)
  42     {
  43       fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
  44       abort ();
  45     }
  46
  47   r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
  48   if (r != CUDA_SUCCESS)
  49     {
  50       fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
  51       abort ();
  52     }
  53
  54   r = cuModuleLoad (&module, "subr.ptx");
  55   if (r != CUDA_SUCCESS)
  56     {
  57       fprintf (stderr, "cuModuleLoad failed: %d\n", r);
  58       abort ();
  59     }
  60
  61   r = cuModuleGetFunction (&delay, module, "delay");
  62   if (r != CUDA_SUCCESS)
  63     {
  64       fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
  65       abort ();
  66     }
  67
  68   nbytes = nprocs * sizeof (unsigned long);
  69
  70   dtime = 200.0;
  71
  72   dticks = (unsigned long) (dtime * clkrate);
  73
  74   a = (unsigned long *) malloc (nbytes);
  75   d_a = (unsigned long *) acc_malloc (nbytes);
  76
  77   acc_map_data (a, d_a, nbytes);
  78
  79   kargs[0] = (void *) &d_a;
  80   kargs[1] = (void *) &dticks;
  81
  82   for (i = 0; i < N; i++)
  83     {
  84       streams[i] = (CUstream) acc_get_cuda_stream (i);
  85       if (streams[i] != NULL)
  86         abort ();
  87
  88       r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
  89       if (r != CUDA_SUCCESS)
  90         {
  91           fprintf (stderr, "cuStreamCreate failed: %d\n", r);
  92           abort ();
  93         }
  94
  95         if (!acc_set_cuda_stream (i, streams[i]))
  96           abort ();
  97     }
  98
  99   for (i = 0; i < N; i++)
 100     {
 101       r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
 102       if (r != CUDA_SUCCESS)
 103         {
 104           fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
 105           abort ();
 106         }
 107
 108       if (acc_async_test (i) != 0)
 109         {
 110           fprintf (stderr, "asynchronous operation not running\n");
 111           abort ();
 112         }
 113     }
 114
 115   sleep ((int) (dtime / 1000.0f) + 1);
 116
 117   for (i = 0; i < N; i++)
 118     {
 119       if (acc_async_test (i) != 1)
 120         {
 121           fprintf (stderr, "found asynchronous operation still running\n");
 122           abort ();
 123         }
 124     }
 125
 126   acc_unmap_data (a);
 127
 128   free (a);
 129   acc_free (d_a);
 130
 131   acc_shutdown (acc_device_nvidia);
 132
 133   exit (0);
 134 }
 135
 136 /* { dg-output "" } */