Daily bump.
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / lib-81.c
blob958672c45e5f3db8d7678b8df20f277bdb4fdb91
1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include "timer.h"
12 int
13 main (int argc, char **argv)
15 CUdevice dev;
16 CUfunction delay;
17 CUmodule module;
18 CUresult r;
19 int N;
20 int i;
21 CUstream *streams, stream;
22 unsigned long *a, *d_a, dticks;
23 int nbytes;
24 float atime, dtime;
25 void *kargs[2];
26 int clkrate;
27 int devnum, nprocs;
29 acc_init (acc_device_nvidia);
31 devnum = acc_get_device_num (acc_device_nvidia);
33 r = cuDeviceGet (&dev, devnum);
34 if (r != CUDA_SUCCESS)
36 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
37 abort ();
40 r =
41 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
42 dev);
43 if (r != CUDA_SUCCESS)
45 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
46 abort ();
49 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
50 if (r != CUDA_SUCCESS)
52 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
53 abort ();
56 r = cuModuleLoad (&module, "subr.ptx");
57 if (r != CUDA_SUCCESS)
59 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
60 abort ();
63 r = cuModuleGetFunction (&delay, module, "delay");
64 if (r != CUDA_SUCCESS)
66 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
67 abort ();
70 nbytes = nprocs * sizeof (unsigned long);
72 dtime = 500.0;
74 dticks = (unsigned long) (dtime * clkrate);
76 N = nprocs;
78 a = (unsigned long *) malloc (nbytes);
79 d_a = (unsigned long *) acc_malloc (nbytes);
81 acc_map_data (a, d_a, nbytes);
83 streams = (CUstream *) malloc (N * sizeof (void *));
85 for (i = 0; i < N; i++)
87 streams[i] = (CUstream) acc_get_cuda_stream (i);
88 if (streams[i] != NULL)
89 abort ();
91 r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
92 if (r != CUDA_SUCCESS)
94 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
95 abort ();
98 if (!acc_set_cuda_stream (i, streams[i]))
99 abort ();
102 init_timers (1);
104 kargs[0] = (void *) &d_a;
105 kargs[1] = (void *) &dticks;
107 stream = (CUstream) acc_get_cuda_stream (N);
108 if (stream != NULL)
109 abort ();
111 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
112 if (r != CUDA_SUCCESS)
114 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
115 abort ();
118 if (!acc_set_cuda_stream (N, stream))
119 abort ();
121 start_timer (0);
123 for (i = 0; i < N; i++)
125 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
126 if (r != CUDA_SUCCESS)
128 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
129 abort ();
133 acc_wait_all_async (N);
135 for (i = 0; i <= N; i++)
137 if (acc_async_test (i) != 0)
138 abort ();
141 acc_wait (N);
143 for (i = 0; i <= N; i++)
145 if (acc_async_test (i) != 1)
146 abort ();
149 atime = stop_timer (0);
151 if (atime < dtime)
153 fprintf (stderr, "actual time < delay time\n");
154 abort ();
157 start_timer (0);
159 stream = (CUstream) acc_get_cuda_stream (N + 1);
160 if (stream != NULL)
161 abort ();
163 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
164 if (r != CUDA_SUCCESS)
166 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
167 abort ();
170 if (!acc_set_cuda_stream (N + 1, stream))
171 abort ();
173 acc_wait_all_async (N + 1);
175 acc_wait (N + 1);
177 atime = stop_timer (0);
179 if (0.10 < atime)
181 fprintf (stderr, "actual time too long\n");
182 abort ();
185 start_timer (0);
187 acc_wait_all_async (N);
189 acc_wait (N);
191 atime = stop_timer (0);
193 if (0.10 < atime)
195 fprintf (stderr, "actual time too long\n");
196 abort ();
199 acc_unmap_data (a);
201 fini_timers ();
203 free (streams);
204 free (a);
205 acc_free (d_a);
207 acc_shutdown (acc_device_nvidia);
209 exit (0);
212 /* { dg-output "" } */