Set num_threads to 50 on 32-bit hppa in two libgomp loop tests
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / lib-76.c
blobf9045266f1c4c363e03372e950ce6fa660ae3b87
1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include "timer.h"
12 int
13 main (int argc, char **argv)
15 CUdevice dev;
16 CUfunction delay;
17 CUmodule module;
18 CUresult r;
19 int N;
20 int i;
21 CUstream *streams;
22 unsigned long *a, *d_a, dticks;
23 int nbytes;
24 float atime, dtime, hitime, lotime;
25 void *kargs[2];
26 int clkrate;
27 int devnum, nprocs;
29 acc_init (acc_device_nvidia);
31 devnum = acc_get_device_num (acc_device_nvidia);
33 r = cuDeviceGet (&dev, devnum);
34 if (r != CUDA_SUCCESS)
36 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
37 abort ();
40 r =
41 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
42 dev);
43 if (r != CUDA_SUCCESS)
45 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
46 abort ();
49 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
50 if (r != CUDA_SUCCESS)
52 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
53 abort ();
56 r = cuModuleLoad (&module, "subr.ptx");
57 if (r != CUDA_SUCCESS)
59 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
60 abort ();
63 r = cuModuleGetFunction (&delay, module, "delay");
64 if (r != CUDA_SUCCESS)
66 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
67 abort ();
70 nbytes = nprocs * sizeof (unsigned long);
72 dtime = 200.0;
74 dticks = (unsigned long) (dtime * clkrate);
76 N = nprocs;
78 a = (unsigned long *) malloc (nbytes);
79 d_a = (unsigned long *) acc_malloc (nbytes);
81 acc_map_data (a, d_a, nbytes);
83 streams = (CUstream *) malloc (N * sizeof (void *));
85 for (i = 0; i < N; i++)
87 streams[i] = (CUstream) acc_get_cuda_stream (i);
88 if (streams[i] != NULL)
89 abort ();
91 r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
92 if (r != CUDA_SUCCESS)
94 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
95 abort ();
98 if (!acc_set_cuda_stream (i, streams[i]))
99 abort ();
102 init_timers (1);
104 kargs[0] = (void *) &d_a;
105 kargs[1] = (void *) &dticks;
107 start_timer (0);
109 for (i = 0; i < N; i++)
111 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
112 if (r != CUDA_SUCCESS)
114 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
115 abort ();
118 acc_wait (i);
121 atime = stop_timer (0);
123 hitime = dtime * N;
124 hitime += hitime * 0.02;
126 lotime = dtime * N;
127 lotime -= lotime * 0.02;
129 if (atime > hitime || atime < lotime)
131 fprintf (stderr, "actual time < delay time\n");
132 abort ();
135 acc_unmap_data (a);
137 fini_timers ();
139 free (streams);
140 free (a);
141 acc_free (d_a);
143 acc_shutdown (acc_device_nvidia);
145 exit (0);
148 /* { dg-output "" } */