1 /* { dg-additional-options "-fopenacc-dim=32" } */
5 #include <gomp-constants.h>
7 static int check (const int *ary
, int size
, int gp
, int wp
, int vp
)
11 int gangs
[32], workers
[32], vectors
[32];
13 for (ix
= 0; ix
< 32; ix
++)
14 gangs
[ix
] = workers
[ix
] = vectors
[ix
] = 0;
16 for (ix
= 0; ix
< size
; ix
++)
18 vectors
[ary
[ix
] & 0xff]++;
19 workers
[(ary
[ix
] >> 8) & 0xff]++;
20 gangs
[(ary
[ix
] >> 16) & 0xff]++;
23 for (ix
= 0; ix
< 32; ix
++)
27 int expect
= gangs
[0];
28 if (gangs
[ix
] != expect
)
31 printf ("gang %d not used %d times\n", ix
, expect
);
34 else if (ix
&& gangs
[ix
])
37 printf ("gang %d unexpectedly used\n", ix
);
42 int expect
= workers
[0];
43 if (workers
[ix
] != expect
)
46 printf ("worker %d not used %d times\n", ix
, expect
);
49 else if (ix
&& workers
[ix
])
52 printf ("worker %d unexpectedly used\n", ix
);
57 int expect
= vectors
[0];
58 if (vectors
[ix
] != expect
)
61 printf ("vector %d not used %d times\n", ix
, expect
);
64 else if (ix
&& vectors
[ix
])
67 printf ("vector %d unexpectedly used\n", ix
);
74 #pragma acc routine seq
75 static int __attribute__((noinline
)) place ()
79 if (acc_on_device (acc_device_not_host
))
83 g
= __builtin_goacc_parlevel_id (GOMP_DIM_GANG
);
84 w
= __builtin_goacc_parlevel_id (GOMP_DIM_WORKER
);
85 v
= __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR
);
86 r
= (g
<< 16) | (w
<< 8) | v
;
91 static void clear (int *ary
, int size
)
95 for (ix
= 0; ix
< size
; ix
++)
99 int gang_vector_1 (int *ary
, int size
)
102 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
104 #pragma acc loop tile(128) gang vector
105 for (int jx
= 0; jx
< size
; jx
++)
109 return check (ary
, size
, 1, 0, 1);
112 int gang_vector_2a (int *ary
, int size
)
118 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
120 #pragma acc loop tile(64, 64) gang vector
121 for (int jx
= 0; jx
< size
/ 256; jx
++)
122 for (int ix
= 0; ix
< 256; ix
++)
123 ary
[jx
* 256 + ix
] = place ();
126 return check (ary
, size
, 1, 0, 1);
129 int gang_vector_2b (int *ary
, int size
)
135 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
137 #pragma acc loop tile(64, 64) gang vector
138 for (int jx
= 0; jx
< size
; jx
+= 256)
139 for (int ix
= 0; ix
< 256; ix
++)
140 ary
[jx
+ ix
] = place ();
143 return check (ary
, size
, 1, 0, 1);
146 int worker_vector_2a (int *ary
, int size
)
152 #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
154 #pragma acc loop tile(64, 64) worker vector
155 for (int jx
= 0; jx
< size
/ 256; jx
++)
156 for (int ix
= 0; ix
< 256; ix
++)
157 ary
[jx
* 256 + ix
] = place ();
160 return check (ary
, size
, 0, 1, 1);
163 int worker_vector_2b (int *ary
, int size
)
169 #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
171 #pragma acc loop tile(64, 64) worker vector
172 for (int jx
= 0; jx
< size
; jx
+= 256)
173 for (int ix
= 0; ix
< 256; ix
++)
174 ary
[jx
+ ix
] = place ();
177 return check (ary
, size
, 0, 1, 1);
180 int gang_worker_vector_2a (int *ary
, int size
)
185 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
187 #pragma acc loop tile(32, 32)
188 for (int jx
= 0; jx
< size
/ 256; jx
++)
189 for (int ix
= 0; ix
< 256; ix
++)
190 ary
[jx
* 256 + ix
] = place ();
193 return check (ary
, size
, 1, 1, 1);
196 int gang_worker_vector_2b (int *ary
, int size
)
201 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
203 #pragma acc loop tile(32, 32)
204 for (int jx
= 0; jx
< size
; jx
+= 256)
205 for (int ix
= 0; ix
< 256; ix
++)
206 ary
[jx
+ ix
] = place ();
209 return check (ary
, size
, 1, 1, 1);
212 int gang_worker_vector_star_2a (int *ary
, int size
)
218 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
220 #pragma acc loop tile(*, *)
221 for (int jx
= 0; jx
< size
/ 256; jx
++)
222 for (int ix
= 0; ix
< 256; ix
++)
223 ary
[jx
* 256 + ix
] = place ();
226 return check (ary
, size
, 1, 1, 1);
229 int gang_worker_vector_star_2b (int *ary
, int size
)
235 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
237 #pragma acc loop tile(*, *)
238 for (int jx
= 0; jx
< size
; jx
+=256)
239 for (int ix
= 0; ix
< 256; ix
++)
240 ary
[jx
+ ix
] = place ();
243 return check (ary
, size
, 1, 1, 1);
246 #define N (32*32*32*8)
251 #pragma acc parallel copy(ondev)
253 ondev
= acc_on_device (acc_device_not_host
);
259 if (gang_vector_1 (ary
, N
))
261 if (gang_vector_2a (ary
, N
))
263 if (worker_vector_2a (ary
, N
))
265 if (gang_worker_vector_2a (ary
, N
))
267 if (gang_worker_vector_star_2a (ary
, N
))
269 if (gang_vector_2b (ary
, N
))
271 if (worker_vector_2b (ary
, N
))
273 if (gang_worker_vector_2b (ary
, N
))
275 if (gang_worker_vector_star_2b (ary
, N
))