1 /* Test dispatch of events to callbacks. */
3 /* { dg-additional-options "--param=openacc-kernels=decompose" } */
5 /* { dg-additional-options "-fopt-info-omp-all" }
6 { dg-additional-options "-foffload=-fopt-info-omp-all" } */
8 /* { dg-additional-options "--param=openacc-privatization=noisy" }
9 { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
10 Prune a few: uninteresting:
11 { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
13 /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
14 passed to 'incr' may be unset, and in that case, it will be set to [...]",
15 so to maintain compatibility with earlier Tcl releases, we manually
16 initialize counter variables:
17 { dg-line l_dummy[variable c_compute 0] }
18 { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
19 "WARNING: dg-line var l_dummy defined, but not used". */
29 /* Use explicit 'copyin' clauses, to work around "'firstprivate'
30 optimizations", which will cause the value at the point of call to be used
31 (*before* any potential modifications done in callbacks), as opposed to its
32 address being taken, which then later gets dereferenced (*after* any
33 modifications done in callbacks). */
34 #define COPYIN(...) copyin(__VA_ARGS__)
37 /* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
39 #define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
42 #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
45 volatile // TODO PR90488
46 static int state
= -1;
48 #define STATE_OP(state, op) \
51 typeof (state) state_o = (state); \
54 DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
59 static acc_device_t acc_device_type
;
60 static int acc_device_num
;
61 static int num_gangs
, num_workers
, vector_length
;
64 static void cb_enqueue_launch_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
66 DEBUG_printf ("%s\n", __FUNCTION__
);
68 assert (acc_device_type
!= acc_device_host
);
71 STATE_OP (state
, = 1);
73 assert (prof_info
->event_type
== acc_ev_enqueue_launch_start
);
74 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
75 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
76 assert (prof_info
->device_type
== acc_device_type
);
77 assert (prof_info
->device_number
== acc_device_num
);
78 assert (prof_info
->thread_id
== -1);
79 assert (prof_info
->async
== acc_async_noval
);
80 assert (prof_info
->async_queue
== prof_info
->async
);
81 assert (prof_info
->src_file
== NULL
);
82 assert (prof_info
->func_name
== NULL
);
83 assert (prof_info
->line_no
== -1);
84 assert (prof_info
->end_line_no
== -1);
85 assert (prof_info
->func_line_no
== -1);
86 assert (prof_info
->func_end_line_no
== -1);
88 assert (event_info
->launch_event
.event_type
== prof_info
->event_type
);
89 assert (event_info
->launch_event
.valid_bytes
== _ACC_LAUNCH_EVENT_INFO_VALID_BYTES
);
90 assert (event_info
->launch_event
.parent_construct
== acc_construct_parallel
);
91 assert (event_info
->launch_event
.implicit
== 1);
92 assert (event_info
->launch_event
.tool_info
== NULL
);
93 assert (event_info
->launch_event
.kernel_name
!= NULL
);
95 const char *s
= strstr (event_info
->launch_event
.kernel_name
, "main");
97 s
= strstr (s
, "omp_fn");
101 assert (event_info
->launch_event
.num_gangs
>= 1);
105 assert (event_info
->launch_event
.num_gangs
== num_gangs
);
107 /* No parallelized OpenACC 'kernels' constructs. Unparallelized OpenACC
108 'kernels' constructs must get launched as 1 x 1 x 1 GPU kernels. */
109 assert (event_info
->launch_event
.num_gangs
== 1);
113 assert (event_info
->launch_event
.num_workers
>= 1);
117 assert (event_info
->launch_event
.num_workers
== num_workers
);
119 /* See 'num_gangs' above. */
120 assert (event_info
->launch_event
.num_workers
== 1);
123 if (vector_length
< 1)
124 assert (event_info
->launch_event
.vector_length
>= 1);
125 else if (acc_device_type
== acc_device_nvidia
) /* ... is special. */
126 assert (event_info
->launch_event
.vector_length
== 32);
127 else if (acc_device_type
== acc_device_radeon
) /* ...and so is this. */
128 assert (event_info
->launch_event
.vector_length
== 64);
132 assert (event_info
->launch_event
.vector_length
== vector_length
);
134 /* See 'num_gangs' above. */
135 assert (event_info
->launch_event
.vector_length
== 1);
139 if (acc_device_type
== acc_device_host
)
140 assert (api_info
->device_api
== acc_device_api_none
);
141 else if (acc_device_type
== acc_device_radeon
)
142 assert (api_info
->device_api
== acc_device_api_other
);
144 assert (api_info
->device_api
== acc_device_api_cuda
);
145 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
146 assert (api_info
->device_type
== prof_info
->device_type
);
147 assert (api_info
->vendor
== -1);
148 assert (api_info
->device_handle
== NULL
);
149 assert (api_info
->context_handle
== NULL
);
150 assert (api_info
->async_handle
== NULL
);
154 static acc_prof_reg reg
;
155 static acc_prof_reg unreg
;
156 static acc_prof_lookup_func lookup
;
157 void acc_register_library (acc_prof_reg reg_
, acc_prof_reg unreg_
, acc_prof_lookup_func lookup_
)
159 DEBUG_printf ("%s\n", __FUNCTION__
);
169 acc_register_library (acc_prof_register
, acc_prof_unregister
, acc_prof_lookup
);
171 STATE_OP (state
, = 0);
172 reg (acc_ev_enqueue_launch_start
, cb_enqueue_launch_start
, acc_reg
);
175 acc_device_type
= acc_get_device_type ();
176 acc_device_num
= acc_get_device_num (acc_device_type
);
179 /* Parallelism dimensions: compiler/runtime decides. */
180 STATE_OP (state
, = 0);
181 num_gangs
= num_workers
= vector_length
= 0;
185 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
186 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
187 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
188 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
189 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
190 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
192 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
193 for (int i
= 0; i
< N
; ++i
)
196 if (acc_device_type
== acc_device_host
)
197 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
200 for (int i
= 0; i
< N
; ++i
)
206 /* Parallelism dimensions: literal. */
207 STATE_OP (state
, = 0);
214 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
215 num_gangs (30) num_workers (3) vector_length (5)
216 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
217 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
218 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
219 /* { dg-warning {using 'vector_length \(32\)', ignoring 5} {} { target { __OPTIMIZE__ && openacc_nvidia_accel_selected } } l_compute$c_compute } */
220 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
221 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
223 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
224 for (int i
= 0; i
< N
; ++i
)
227 if (acc_device_type
== acc_device_host
)
228 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
231 for (int i
= 0; i
< N
; ++i
)
237 /* Parallelism dimensions: variable. */
238 STATE_OP (state
, = 0);
245 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
246 num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
247 /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
248 { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
249 /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
250 /* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target { __OPTIMIZE__ && openacc_nvidia_accel_selected } } l_compute$c_compute } */
251 /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
252 { dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
254 /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
255 for (int i
= 0; i
< N
; ++i
)
258 if (acc_device_type
== acc_device_host
)
259 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
262 for (int i
= 0; i
< N
; ++i
)