Set num_threads to 50 on 32-bit hppa in two libgomp loop tests
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / acc_prof-kernels-1.c
blob2c8539714740ed7d1bba0504d23cc96b74730e67
1 /* Test dispatch of events to callbacks. */
3 /* { dg-additional-options "--param=openacc-kernels=decompose" } */
5 /* { dg-additional-options "-fopt-info-omp-all" }
6 { dg-additional-options "-foffload=-fopt-info-omp-all" } */
8 /* { dg-additional-options "--param=openacc-privatization=noisy" }
9 { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
10 Prune a few: uninteresting:
11 { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
13 /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
14 passed to 'incr' may be unset, and in that case, it will be set to [...]",
15 so to maintain compatibility with earlier Tcl releases, we manually
16 initialize counter variables:
17 { dg-line l_dummy[variable c_compute 0] }
18 { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
19 "WARNING: dg-line var l_dummy defined, but not used". */
21 #undef NDEBUG
22 #include <assert.h>
23 #include <stdlib.h>
24 #include <string.h>
26 #include <acc_prof.h>
29 /* Use explicit 'copyin' clauses, to work around "'firstprivate'
30 optimizations", which will cause the value at the point of call to be used
31 (*before* any potential modifications done in callbacks), as opposed to its
32 address being taken, which then later gets dereferenced (*after* any
33 modifications done in callbacks). */
34 #define COPYIN(...) copyin(__VA_ARGS__)
37 /* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
38 'libgomp.texi'. */
39 #define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
42 #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
45 volatile // TODO PR90488
46 static int state = -1;
48 #define STATE_OP(state, op) \
49 do \
50 { \
51 typeof (state) state_o = (state); \
52 (void) state_o; \
53 (state)op; \
54 DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
55 } \
56 while (0)
59 static acc_device_t acc_device_type;
60 static int acc_device_num;
61 static int num_gangs, num_workers, vector_length;
64 static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
66 DEBUG_printf ("%s\n", __FUNCTION__);
68 assert (acc_device_type != acc_device_host);
70 assert (state == 0);
71 STATE_OP (state, = 1);
73 assert (prof_info->event_type == acc_ev_enqueue_launch_start);
74 assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
75 assert (prof_info->version == _ACC_PROF_INFO_VERSION);
76 assert (prof_info->device_type == acc_device_type);
77 assert (prof_info->device_number == acc_device_num);
78 assert (prof_info->thread_id == -1);
79 assert (prof_info->async == acc_async_noval);
80 assert (prof_info->async_queue == prof_info->async);
81 assert (prof_info->src_file == NULL);
82 assert (prof_info->func_name == NULL);
83 assert (prof_info->line_no == -1);
84 assert (prof_info->end_line_no == -1);
85 assert (prof_info->func_line_no == -1);
86 assert (prof_info->func_end_line_no == -1);
88 assert (event_info->launch_event.event_type == prof_info->event_type);
89 assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
90 assert (event_info->launch_event.parent_construct == acc_construct_parallel);
91 assert (event_info->launch_event.implicit == 1);
92 assert (event_info->launch_event.tool_info == NULL);
93 assert (event_info->launch_event.kernel_name != NULL);
95 const char *s = strstr (event_info->launch_event.kernel_name, "main");
96 assert (s != NULL);
97 s = strstr (s, "omp_fn");
98 assert (s != NULL);
100 if (num_gangs < 1)
101 assert (event_info->launch_event.num_gangs >= 1);
102 else
104 #ifdef __OPTIMIZE__
105 assert (event_info->launch_event.num_gangs == num_gangs);
106 #else
107 /* No parallelized OpenACC 'kernels' constructs. Unparallelized OpenACC
108 'kernels' constructs must get launched as 1 x 1 x 1 GPU kernels. */
109 assert (event_info->launch_event.num_gangs == 1);
110 #endif
112 if (num_workers < 1)
113 assert (event_info->launch_event.num_workers >= 1);
114 else
116 #ifdef __OPTIMIZE__
117 assert (event_info->launch_event.num_workers == num_workers);
118 #else
119 /* See 'num_gangs' above. */
120 assert (event_info->launch_event.num_workers == 1);
121 #endif
123 if (vector_length < 1)
124 assert (event_info->launch_event.vector_length >= 1);
125 else if (acc_device_type == acc_device_nvidia) /* ... is special. */
126 assert (event_info->launch_event.vector_length == 32);
127 else if (acc_device_type == acc_device_radeon) /* ...and so is this. */
128 assert (event_info->launch_event.vector_length == 64);
129 else
131 #ifdef __OPTIMIZE__
132 assert (event_info->launch_event.vector_length == vector_length);
133 #else
134 /* See 'num_gangs' above. */
135 assert (event_info->launch_event.vector_length == 1);
136 #endif
139 if (acc_device_type == acc_device_host)
140 assert (api_info->device_api == acc_device_api_none);
141 else if (acc_device_type == acc_device_radeon)
142 assert (api_info->device_api == acc_device_api_other);
143 else
144 assert (api_info->device_api == acc_device_api_cuda);
145 assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES);
146 assert (api_info->device_type == prof_info->device_type);
147 assert (api_info->vendor == -1);
148 assert (api_info->device_handle == NULL);
149 assert (api_info->context_handle == NULL);
150 assert (api_info->async_handle == NULL);
154 static acc_prof_reg reg;
155 static acc_prof_reg unreg;
156 static acc_prof_lookup_func lookup;
157 void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
159 DEBUG_printf ("%s\n", __FUNCTION__);
161 reg = reg_;
162 unreg = unreg_;
163 lookup = lookup_;
167 int main()
169 acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
171 STATE_OP (state, = 0);
172 reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
173 assert (state == 0);
175 acc_device_type = acc_get_device_type ();
176 acc_device_num = acc_get_device_num (acc_device_type);
177 assert (state == 0);
179 /* Parallelism dimensions: compiler/runtime decides. */
180 STATE_OP (state, = 0);
181 num_gangs = num_workers = vector_length = 0;
183 #define N 100
184 int x[N];
185 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
186 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
187 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
188 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
189 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
190 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
192 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
193 for (int i = 0; i < N; ++i)
194 x[i] = i * i;
196 if (acc_device_type == acc_device_host)
197 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
198 else
199 assert (state == 1);
200 for (int i = 0; i < N; ++i)
201 if (x[i] != i * i)
202 __builtin_abort ();
203 #undef N
206 /* Parallelism dimensions: literal. */
207 STATE_OP (state, = 0);
208 num_gangs = 30;
209 num_workers = 3;
210 vector_length = 5;
212 #define N 100
213 int x[N];
214 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
215 num_gangs (30) num_workers (3) vector_length (5)
216 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
217 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
218 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
219 /* { dg-warning {using 'vector_length \(32\)', ignoring 5} {} { target { __OPTIMIZE__ && openacc_nvidia_accel_selected } } l_compute$c_compute } */
220 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
221 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
223 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
224 for (int i = 0; i < N; ++i)
225 x[i] = i * i;
227 if (acc_device_type == acc_device_host)
228 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
229 else
230 assert (state == 1);
231 for (int i = 0; i < N; ++i)
232 if (x[i] != i * i)
233 __builtin_abort ();
234 #undef N
237 /* Parallelism dimensions: variable. */
238 STATE_OP (state, = 0);
239 num_gangs = 22;
240 num_workers = 5;
241 vector_length = 7;
243 #define N 100
244 int x[N];
245 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
246 num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
247 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
248 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
249 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
250 /* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target { __OPTIMIZE__ && openacc_nvidia_accel_selected } } l_compute$c_compute } */
251 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
252 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
254 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
255 for (int i = 0; i < N; ++i)
256 x[i] = i * i;
258 if (acc_device_type == acc_device_host)
259 assert (state == 0); /* No 'acc_ev_enqueue_launch_start'. */
260 else
261 assert (state == 1);
262 for (int i = 0; i < N; ++i)
263 if (x[i] != i * i)
264 __builtin_abort ();
265 #undef N
268 return 0;