* gcc-interface/trans.c (Call_to_gnu): If this is a function call and
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / loop-auto-1.c
blob863b6b38c34ba028797558e2626fadaeb0cd45b1
1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler.
3 { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
5 /* { dg-additional-options "-fopenacc-dim=32" } */
7 #include <stdio.h>
8 #include <openacc.h>
10 int check (const int *ary, int size, int gp, int wp, int vp)
12 int exit = 0;
13 int ix;
14 int gangs[32], workers[32], vectors[32];
16 for (ix = 0; ix < 32; ix++)
17 gangs[ix] = workers[ix] = vectors[ix] = 0;
19 for (ix = 0; ix < size; ix++)
21 vectors[ary[ix] & 0xff]++;
22 workers[(ary[ix] >> 8) & 0xff]++;
23 gangs[(ary[ix] >> 16) & 0xff]++;
26 for (ix = 0; ix < 32; ix++)
28 if (gp)
30 int expect = gangs[0];
31 if (gangs[ix] != expect)
33 exit = 1;
34 printf ("gang %d not used %d times\n", ix, expect);
37 else if (ix && gangs[ix])
39 exit = 1;
40 printf ("gang %d unexpectedly used\n", ix);
43 if (wp)
45 int expect = workers[0];
46 if (workers[ix] != expect)
48 exit = 1;
49 printf ("worker %d not used %d times\n", ix, expect);
52 else if (ix && workers[ix])
54 exit = 1;
55 printf ("worker %d unexpectedly used\n", ix);
58 if (vp)
60 int expect = vectors[0];
61 if (vectors[ix] != expect)
63 exit = 1;
64 printf ("vector %d not used %d times\n", ix, expect);
67 else if (ix && vectors[ix])
69 exit = 1;
70 printf ("vector %d unexpectedly used\n", ix);
74 return exit;
77 #pragma acc routine seq
78 static int __attribute__((noinline)) place ()
80 int r = 0;
82 if (acc_on_device (acc_device_nvidia))
84 int g = 0, w = 0, v = 0;
86 __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
87 __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
88 __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
89 r = (g << 16) | (w << 8) | v;
91 return r;
94 static void clear (int *ary, int size)
96 int ix;
98 for (ix = 0; ix < size; ix++)
99 ary[ix] = -1;
102 int vector_1 (int *ary, int size)
104 clear (ary, size);
106 #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
108 #pragma acc loop gang
109 for (int jx = 0; jx < 1; jx++)
110 #pragma acc loop auto
111 for (int ix = 0; ix < size; ix++)
112 ary[ix] = place ();
115 return check (ary, size, 0, 1, 1);
118 int vector_2 (int *ary, int size)
120 clear (ary, size);
122 #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
124 #pragma acc loop worker
125 for (int jx = 0; jx < size / 64; jx++)
126 #pragma acc loop auto
127 for (int ix = 0; ix < 64; ix++)
128 ary[ix + jx * 64] = place ();
131 return check (ary, size, 0, 1, 1);
134 int worker_1 (int *ary, int size)
136 clear (ary, size);
138 #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
140 #pragma acc loop gang
141 for (int kx = 0; kx < 1; kx++)
142 #pragma acc loop auto
143 for (int jx = 0; jx < size / 64; jx++)
144 #pragma acc loop vector
145 for (int ix = 0; ix < 64; ix++)
146 ary[ix + jx * 64] = place ();
149 return check (ary, size, 0, 1, 1);
152 int gang_1 (int *ary, int size)
154 clear (ary, size);
156 #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
158 #pragma acc loop auto
159 for (int jx = 0; jx < size / 64; jx++)
160 #pragma acc loop worker
161 for (int ix = 0; ix < 64; ix++)
162 ary[ix + jx * 64] = place ();
165 return check (ary, size, 1, 1, 0);
168 int gang_2 (int *ary, int size)
170 clear (ary, size);
172 #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
174 #pragma acc loop auto
175 for (int kx = 0; kx < size / (32 * 32); kx++)
176 #pragma acc loop auto
177 for (int jx = 0; jx < 32; jx++)
178 #pragma acc loop auto
179 for (int ix = 0; ix < 32; ix++)
180 ary[ix + jx * 32 + kx * 32 * 32] = place ();
183 return check (ary, size, 1, 1, 1);
186 int gang_3 (int *ary, int size)
188 clear (ary, size);
190 #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
192 #pragma acc loop auto
193 for (int jx = 0; jx < size / 64; jx++)
194 #pragma acc loop auto
195 for (int ix = 0; ix < 64; ix++)
196 ary[ix + jx * 64] = place ();
199 return check (ary, size, 1, 1, 1);
202 int gang_4 (int *ary, int size)
204 clear (ary, size);
206 #pragma acc parallel vector_length(32) copy(ary[0:size]) firstprivate (size)
208 #pragma acc loop auto
209 for (int jx = 0; jx < size; jx++)
210 ary[jx] = place ();
213 return check (ary, size, 1, 0, 1);
216 #define N (32*32*32*2)
217 int main ()
219 int ondev = 0;
221 #pragma acc parallel copy(ondev)
223 ondev = acc_on_device (acc_device_not_host);
225 if (!ondev)
226 return 0;
228 int ary[N];
230 if (vector_1 (ary, N))
231 return 1;
232 if (vector_2 (ary, N))
233 return 1;
235 if (worker_1 (ary, N))
236 return 1;
238 if (gang_1 (ary, N))
239 return 1;
240 if (gang_2 (ary, N))
241 return 1;
242 if (gang_3 (ary, N))
243 return 1;
244 if (gang_4 (ary, N))
245 return 1;
247 return 0;