1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
11 main (int argc
, char **argv
)
20 unsigned long **a
, **d_a
, *tid
, ticks
;
26 acc_init (acc_device_nvidia
);
28 devnum
= acc_get_device_num (acc_device_nvidia
);
30 r
= cuDeviceGet (&dev
, devnum
);
31 if (r
!= CUDA_SUCCESS
)
33 fprintf (stderr
, "cuDeviceGet failed: %d\n", r
);
38 cuDeviceGetAttribute (&nprocs
, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
40 if (r
!= CUDA_SUCCESS
)
42 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
46 r
= cuDeviceGetAttribute (&clkrate
, CU_DEVICE_ATTRIBUTE_CLOCK_RATE
, dev
);
47 if (r
!= CUDA_SUCCESS
)
49 fprintf (stderr
, "cuDeviceGetAttribute failed: %d\n", r
);
53 r
= cuModuleLoad (&module
, "subr.ptx");
54 if (r
!= CUDA_SUCCESS
)
56 fprintf (stderr
, "cuModuleLoad failed: %d\n", r
);
60 r
= cuModuleGetFunction (&delay2
, module
, "delay2");
61 if (r
!= CUDA_SUCCESS
)
63 fprintf (stderr
, "cuModuleGetFunction failed: %d\n", r
);
67 nbytes
= sizeof (int);
69 ticks
= (unsigned long) (200.0 * clkrate
);
73 streams
= (CUstream
*) malloc (N
* sizeof (void *));
75 a
= (unsigned long **) malloc (N
* sizeof (unsigned long *));
76 d_a
= (unsigned long **) malloc (N
* sizeof (unsigned long *));
77 tid
= (unsigned long *) malloc (N
* sizeof (unsigned long));
79 for (i
= 0; i
< N
; i
++)
81 a
[i
] = (unsigned long *) malloc (sizeof (unsigned long));
83 d_a
[i
] = (unsigned long *) acc_malloc (nbytes
);
86 acc_map_data (a
[i
], d_a
[i
], nbytes
);
88 streams
[i
] = (CUstream
) acc_get_cuda_stream (i
);
89 if (streams
[i
] != NULL
)
92 r
= cuStreamCreate (&streams
[i
], CU_STREAM_DEFAULT
);
93 if (r
!= CUDA_SUCCESS
)
95 fprintf (stderr
, "cuStreamCreate failed: %d\n", r
);
99 if (!acc_set_cuda_stream (i
, streams
[i
]))
103 for (i
= 0; i
< N
; i
++)
105 kargs
[0] = (void *) &d_a
[i
];
106 kargs
[1] = (void *) &ticks
;
107 kargs
[2] = (void *) &tid
[i
];
109 r
= cuLaunchKernel (delay2
, 1, 1, 1, 1, 1, 1, 0, streams
[i
], kargs
, 0);
110 if (r
!= CUDA_SUCCESS
)
112 fprintf (stderr
, "cuLaunchKernel failed: %d\n", r
);
116 ticks
= (unsigned long) (50.0 * clkrate
);
119 acc_wait_all_async (0);
121 for (i
= 0; i
< N
; i
++)
123 acc_copyout (a
[i
], nbytes
);
130 for (i
= 0; i
< N
; i
++)
139 acc_shutdown (acc_device_nvidia
);
144 /* { dg-output "" } */