1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
12 main (int argc
, char **argv
)
19 unsigned long *a
, *d_a
, dticks
;
26 acc_init (acc_device_nvidia
);
28 devnum
= acc_get_device_num (acc_device_nvidia
);
30 r
= cuDeviceGet (&dev
, devnum
);
31 if (r
!= CUDA_SUCCESS
)
33 fprintf (stderr
, "cuDeviceGet failed: %d\n", r
);
38 cuDeviceGetAttribute (&nprocs
, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
40 if (r
!= CUDA_SUCCESS
)
42 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
46 r
= cuDeviceGetAttribute (&clkrate
, CU_DEVICE_ATTRIBUTE_CLOCK_RATE
, dev
);
47 if (r
!= CUDA_SUCCESS
)
49 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
53 r
= cuModuleLoad (&module
, "subr.ptx");
54 if (r
!= CUDA_SUCCESS
)
56 fprintf (stderr
, "cuModuleLoad failed: %d\n", r
);
60 r
= cuModuleGetFunction (&delay
, module
, "delay");
61 if (r
!= CUDA_SUCCESS
)
63 fprintf (stderr
, "cuModuleGetFunction failed: %d\n", r
);
67 nbytes
= nprocs
* sizeof (unsigned long);
71 dticks
= (unsigned long) (dtime
* clkrate
);
73 a
= (unsigned long *) malloc (nbytes
);
74 d_a
= (unsigned long *) acc_malloc (nbytes
);
76 acc_map_data (a
, d_a
, nbytes
);
78 kargs
[0] = (void *) &d_a
;
79 kargs
[1] = (void *) &dticks
;
81 r
= cuStreamCreate (&stream
, CU_STREAM_DEFAULT
);
82 if (r
!= CUDA_SUCCESS
)
84 fprintf (stderr
, "cuStreamCreate failed: %d\n", r
);
88 if (!acc_set_cuda_stream (0, stream
))
91 r
= cuLaunchKernel (delay
, 1, 1, 1, 1, 1, 1, 0, stream
, kargs
, 0);
92 if (r
!= CUDA_SUCCESS
)
94 fprintf (stderr
, "cuLaunchKernel failed: %d\n", r
);
98 if (acc_async_test_all () != 0)
100 fprintf (stderr
, "asynchronous operation not running\n");
104 sleep ((int) (dtime
/ 1000.f
) + 1);
106 if (acc_async_test_all () != 1)
108 fprintf (stderr
, "found asynchronous operation still running\n");
117 acc_shutdown (acc_device_nvidia
);
122 /* { dg-output "" } */