1 /* Test dispatch of events to callbacks. */
11 /* Use explicit 'copyin' clauses, to work around "'firstprivate'
12 optimizations", which will cause the value at the point of call to be used
13 (*before* any potential modifications done in callbacks), as opposed to its
14 address being taken, which then later gets dereferenced (*after* any
15 modifications done in callbacks). */
16 #define COPYIN(...) copyin(__VA_ARGS__)
19 /* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
21 #define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
24 #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
27 volatile // TODO PR90488
28 static int state
= -1;
30 #define STATE_OP(state, op) \
33 typeof (state) state_o = (state); \
36 DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
41 static acc_device_t acc_device_type
;
42 static int acc_device_num
;
43 static int num_gangs
, num_workers
, vector_length
;
46 static void cb_enqueue_launch_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
48 DEBUG_printf ("%s\n", __FUNCTION__
);
50 assert (acc_device_type
!= acc_device_host
);
53 STATE_OP (state
, = 1);
55 assert (prof_info
->event_type
== acc_ev_enqueue_launch_start
);
56 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
57 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
58 assert (prof_info
->device_type
== acc_device_type
);
59 assert (prof_info
->device_number
== acc_device_num
);
60 assert (prof_info
->thread_id
== -1);
61 assert (prof_info
->async
== acc_async_sync
);
62 assert (prof_info
->async_queue
== prof_info
->async
);
63 assert (prof_info
->src_file
== NULL
);
64 assert (prof_info
->func_name
== NULL
);
65 assert (prof_info
->line_no
== -1);
66 assert (prof_info
->end_line_no
== -1);
67 assert (prof_info
->func_line_no
== -1);
68 assert (prof_info
->func_end_line_no
== -1);
70 assert (event_info
->launch_event
.event_type
== prof_info
->event_type
);
71 assert (event_info
->launch_event
.valid_bytes
== _ACC_LAUNCH_EVENT_INFO_VALID_BYTES
);
72 assert (event_info
->launch_event
.parent_construct
== acc_construct_parallel
);
73 assert (event_info
->launch_event
.implicit
== 1);
74 assert (event_info
->launch_event
.tool_info
== NULL
);
75 assert (event_info
->launch_event
.kernel_name
!= NULL
);
77 const char *s
= strstr (event_info
->launch_event
.kernel_name
, "main");
79 s
= strstr (s
, "omp_fn");
83 assert (event_info
->launch_event
.num_gangs
>= 1);
87 assert (event_info
->launch_event
.num_gangs
== num_gangs
);
89 /* No parallelized OpenACC 'kernels' constructs. Unparallelized OpenACC
90 'kernels' constructs must get launched as 1 x 1 x 1 GPU kernels. */
91 assert (event_info
->launch_event
.num_gangs
== 1);
95 assert (event_info
->launch_event
.num_workers
>= 1);
99 assert (event_info
->launch_event
.num_workers
== num_workers
);
101 /* See 'num_gangs' above. */
102 assert (event_info
->launch_event
.num_workers
== 1);
105 if (vector_length
< 1)
106 assert (event_info
->launch_event
.vector_length
>= 1);
107 else if (acc_device_type
== acc_device_nvidia
) /* ... is special. */
108 assert (event_info
->launch_event
.vector_length
== 32);
109 else if (acc_device_type
== acc_device_radeon
) /* ...and so is this. */
110 assert (event_info
->launch_event
.vector_length
== 64);
114 assert (event_info
->launch_event
.vector_length
== vector_length
);
116 /* See 'num_gangs' above. */
117 assert (event_info
->launch_event
.vector_length
== 1);
121 if (acc_device_type
== acc_device_host
)
122 assert (api_info
->device_api
== acc_device_api_none
);
123 else if (acc_device_type
== acc_device_radeon
)
124 assert (api_info
->device_api
== acc_device_api_other
);
126 assert (api_info
->device_api
== acc_device_api_cuda
);
127 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
128 assert (api_info
->device_type
== prof_info
->device_type
);
129 assert (api_info
->vendor
== -1);
130 assert (api_info
->device_handle
== NULL
);
131 assert (api_info
->context_handle
== NULL
);
132 assert (api_info
->async_handle
== NULL
);
136 static acc_prof_reg reg
;
137 static acc_prof_reg unreg
;
138 static acc_prof_lookup_func lookup
;
139 void acc_register_library (acc_prof_reg reg_
, acc_prof_reg unreg_
, acc_prof_lookup_func lookup_
)
141 DEBUG_printf ("%s\n", __FUNCTION__
);
151 acc_register_library (acc_prof_register
, acc_prof_unregister
, acc_prof_lookup
);
153 STATE_OP (state
, = 0);
154 reg (acc_ev_enqueue_launch_start
, cb_enqueue_launch_start
, acc_reg
);
157 acc_device_type
= acc_get_device_type ();
158 acc_device_num
= acc_get_device_num (acc_device_type
);
161 /* Parallelism dimensions: compiler/runtime decides. */
162 STATE_OP (state
, = 0);
163 num_gangs
= num_workers
= vector_length
= 0;
169 for (int i
= 0; i
< N
; ++i
)
172 if (acc_device_type
== acc_device_host
)
173 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
176 for (int i
= 0; i
< N
; ++i
)
182 /* Parallelism dimensions: literal. */
183 STATE_OP (state
, = 0);
190 #pragma acc kernels \
191 num_gangs (30) num_workers (3) vector_length (5)
192 /* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */
194 for (int i
= 0; i
< N
; ++i
)
197 if (acc_device_type
== acc_device_host
)
198 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
201 for (int i
= 0; i
< N
; ++i
)
207 /* Parallelism dimensions: variable. */
208 STATE_OP (state
, = 0);
215 #pragma acc kernels \
216 num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
217 /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
219 for (int i
= 0; i
< N
; ++i
)
222 if (acc_device_type
== acc_device_host
)
223 assert (state
== 0); /* No 'acc_ev_enqueue_launch_start'. */
226 for (int i
= 0; i
< N
; ++i
)