Improve atomic store implementation on hppa-linux.
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / acc_prof-kernels-1.c
blobad33f72e2fb6b24658b54b01f83d60f5da02b6b5
1 /* Test dispatch of events to callbacks. */
3 #undef NDEBUG
4 #include <assert.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #include <acc_prof.h>
11 /* Use explicit 'copyin' clauses, to work around "'firstprivate'
12 optimizations", which will cause the value at the point of call to be used
13 (*before* any potential modifications done in callbacks), as opposed to its
14 address being taken, which then later gets dereferenced (*after* any
15 modifications done in callbacks). */
16 #define COPYIN(...) copyin(__VA_ARGS__)
19 /* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
20 'libgomp.texi'. */
21 #define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
24 #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
27 volatile // TODO PR90488
28 static int state = -1;
30 #define STATE_OP(state, op) \
31 do \
32 { \
33 typeof (state) state_o = (state); \
34 (void) state_o; \
35 (state)op; \
36 DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
37 } \
38 while (0)
41 static acc_device_t acc_device_type;
42 static int acc_device_num;
43 static int num_gangs, num_workers, vector_length;
46 static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
48 DEBUG_printf ("%s\n", __FUNCTION__);
50 assert (acc_device_type != acc_device_host);
52 assert (state == 0);
53 STATE_OP (state, = 1);
55 assert (prof_info->event_type == acc_ev_enqueue_launch_start);
56 assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
57 assert (prof_info->version == _ACC_PROF_INFO_VERSION);
58 assert (prof_info->device_type == acc_device_type);
59 assert (prof_info->device_number == acc_device_num);
60 assert (prof_info->thread_id == -1);
61 assert (prof_info->async == acc_async_sync);
62 assert (prof_info->async_queue == prof_info->async);
63 assert (prof_info->src_file == NULL);
64 assert (prof_info->func_name == NULL);
65 assert (prof_info->line_no == -1);
66 assert (prof_info->end_line_no == -1);
67 assert (prof_info->func_line_no == -1);
68 assert (prof_info->func_end_line_no == -1);
70 assert (event_info->launch_event.event_type == prof_info->event_type);
71 assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
72 assert (event_info->launch_event.parent_construct == acc_construct_parallel);
73 assert (event_info->launch_event.implicit == 1);
74 assert (event_info->launch_event.tool_info == NULL);
75 assert (event_info->launch_event.kernel_name != NULL);
77 const char *s = strstr (event_info->launch_event.kernel_name, "main");
78 assert (s != NULL);
79 s = strstr (s, "omp_fn");
80 assert (s != NULL);
82 if (num_gangs < 1)
83 assert (event_info->launch_event.num_gangs >= 1);
84 else
86 #ifdef __OPTIMIZE__
87 assert (event_info->launch_event.num_gangs == num_gangs);
88 #else
89 /* No parallelized OpenACC 'kernels' constructs. Unparallelized OpenACC
90 'kernels' constructs must get launched as 1 x 1 x 1 GPU kernels. */
91 assert (event_info->launch_event.num_gangs == 1);
92 #endif
94 if (num_workers < 1)
95 assert (event_info->launch_event.num_workers >= 1);
96 else
98 #ifdef __OPTIMIZE__
99 assert (event_info->launch_event.num_workers == num_workers);
100 #else
101 /* See 'num_gangs' above. */
102 assert (event_info->launch_event.num_workers == 1);
103 #endif
105 if (vector_length < 1)
106 assert (event_info->launch_event.vector_length >= 1);
107 else if (acc_device_type == acc_device_nvidia) /* ... is special. */
108 assert (event_info->launch_event.vector_length == 32);
109 else if (acc_device_type == acc_device_radeon) /* ...and so is this. */
110 assert (event_info->launch_event.vector_length == 64);
111 else
113 #ifdef __OPTIMIZE__
114 assert (event_info->launch_event.vector_length == vector_length);
115 #else
116 /* See 'num_gangs' above. */
117 assert (event_info->launch_event.vector_length == 1);
118 #endif
121 if (acc_device_type == acc_device_host)
122 assert (api_info->device_api == acc_device_api_none);
123 else if (acc_device_type == acc_device_radeon)
124 assert (api_info->device_api == acc_device_api_other);
125 else
126 assert (api_info->device_api == acc_device_api_cuda);
127 assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
128 assert (api_info->device_type == prof_info->device_type);
129 assert (api_info->vendor == -1);
130 assert (api_info->device_handle == NULL);
131 assert (api_info->context_handle == NULL);
132 assert (api_info->async_handle == NULL);
136 static acc_prof_reg reg;
137 static acc_prof_reg unreg;
138 static acc_prof_lookup_func lookup;
139 void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
141 DEBUG_printf ("%s\n", __FUNCTION__);
143 reg = reg_;
144 unreg = unreg_;
145 lookup = lookup_;
149 int main()
151 acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
153 STATE_OP (state, = 0);
154 reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
155 assert (state == 0);
157 acc_device_type = acc_get_device_type ();
158 acc_device_num = acc_get_device_num (acc_device_type);
159 assert (state == 0);
161 /* Parallelism dimensions: compiler/runtime decides. */
162 STATE_OP (state, = 0);
163 num_gangs = num_workers = vector_length = 0;
165 #define N 100
166 int x[N];
167 #pragma acc kernels
169 for (int i = 0; i < N; ++i)
170 x[i] = i * i;
172 if (acc_device_type == acc_device_host)
173 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
174 else
175 assert (state == 1);
176 for (int i = 0; i < N; ++i)
177 if (x[i] != i * i)
178 __builtin_abort ();
179 #undef N
182 /* Parallelism dimensions: literal. */
183 STATE_OP (state, = 0);
184 num_gangs = 30;
185 num_workers = 3;
186 vector_length = 5;
188 #define N 100
189 int x[N];
190 #pragma acc kernels \
191 num_gangs (30) num_workers (3) vector_length (5)
192 /* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */
194 for (int i = 0; i < N; ++i)
195 x[i] = i * i;
197 if (acc_device_type == acc_device_host)
198 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
199 else
200 assert (state == 1);
201 for (int i = 0; i < N; ++i)
202 if (x[i] != i * i)
203 __builtin_abort ();
204 #undef N
207 /* Parallelism dimensions: variable. */
208 STATE_OP (state, = 0);
209 num_gangs = 22;
210 num_workers = 5;
211 vector_length = 7;
213 #define N 100
214 int x[N];
215 #pragma acc kernels \
216 num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
217 /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
219 for (int i = 0; i < N; ++i)
220 x[i] = i * i;
222 if (acc_device_type == acc_device_host)
223 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
224 else
225 assert (state == 1);
226 for (int i = 0; i < N; ++i)
227 if (x[i] != i * i)
228 __builtin_abort ();
229 #undef N
232 return 0;