1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
12 main (int argc
, char **argv
)
21 unsigned long *a
, *d_a
, dticks
;
23 float atime
, dtime
, hitime
, lotime
;
30 acc_init (acc_device_nvidia
);
32 devnum
= acc_get_device_num (acc_device_nvidia
);
34 r
= cuDeviceGet (&dev
, devnum
);
35 if (r
!= CUDA_SUCCESS
)
37 fprintf (stderr
, "cuDeviceGet failed: %d\n", r
);
42 cuDeviceGetAttribute (&nprocs
, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
44 if (r
!= CUDA_SUCCESS
)
46 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
50 r
= cuDeviceGetAttribute (&clkrate
, CU_DEVICE_ATTRIBUTE_CLOCK_RATE
, dev
);
51 if (r
!= CUDA_SUCCESS
)
53 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
57 r
= cuModuleLoad (&module
, "subr.ptx");
58 if (r
!= CUDA_SUCCESS
)
60 fprintf (stderr
, "cuModuleLoad failed: %d\n", r
);
64 r
= cuModuleGetFunction (&delay
, module
, "delay");
65 if (r
!= CUDA_SUCCESS
)
67 fprintf (stderr
, "cuModuleGetFunction failed: %d\n", r
);
71 nbytes
= nprocs
* sizeof (unsigned long);
75 dticks
= (unsigned long) (dtime
* clkrate
);
79 a
= (unsigned long *) malloc (nbytes
);
80 d_a
= (unsigned long *) acc_malloc (nbytes
);
82 acc_map_data (a
, d_a
, nbytes
);
84 r
= cuStreamCreate (&stream
, CU_STREAM_DEFAULT
);
85 if (r
!= CUDA_SUCCESS
)
87 fprintf (stderr
, "cuStreamCreate failed: %d\n", r
);
91 if (!acc_set_cuda_stream (1, stream
))
94 stream
= (CUstream
) acc_get_cuda_stream (0);
98 r
= cuStreamCreate (&stream
, CU_STREAM_DEFAULT
);
99 if (r
!= CUDA_SUCCESS
)
101 fprintf (stderr
, "cuStreamCreate failed: %d\n", r
);
105 if (!acc_set_cuda_stream (0, stream
))
110 kargs
[0] = (void *) &d_a
;
111 kargs
[1] = (void *) &dticks
;
115 for (i
= 0; i
< N
; i
++)
117 r
= cuLaunchKernel (delay
, 1, 1, 1, 1, 1, 1, 0, stream
, kargs
, 0);
118 if (r
!= CUDA_SUCCESS
)
120 fprintf (stderr
, "cuLaunchKernel failed: %d\n", r
);
125 if (acc_async_test (0) != 0)
128 /* Test unseen async-argument. */
129 if (acc_async_test (1) != 1)
132 acc_wait_async (0, 1);
134 if (acc_async_test (0) != 0)
137 if (acc_async_test (1) != 0)
140 /* Test unseen async-argument. */
142 if (acc_async_test (2) != 1)
145 acc_wait_async (2, 1);
147 if (acc_async_test (0) != 0)
150 if (acc_async_test (1) != 0)
153 if (acc_async_test (2) != 1)
159 atime
= stop_timer (0);
161 if (acc_async_test (0) != 1)
164 if (acc_async_test (1) != 1)
168 hitime
+= hitime
* 0.02;
171 lotime
-= lotime
* 0.02;
173 if (atime
> hitime
|| atime
< lotime
)
175 fprintf (stderr
, "actual time < delay time\n");
186 acc_shutdown (acc_device_nvidia
);
191 /* { dg-output "" } */