Add support for ARMv8-R architecture
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / loop-dim-default.c
blobe2b08c3e0bc65b294ffa61b43adbf8d41b32a08f
1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler. */
3 /* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
4 /* { dg-additional-options "-fopenacc-dim=16:16" } */
6 #include <openacc.h>
7 #include <alloca.h>
8 #include <string.h>
9 #include <stdio.h>
11 #pragma acc routine
12 static int __attribute__ ((noinline)) coord ()
14 int res = 0;
16 if (acc_on_device (acc_device_nvidia))
18 int g = 0, w = 0, v = 0;
20 __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
21 __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
22 __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
23 res = (1 << 24) | (g << 16) | (w << 8) | v;
25 return res;
29 int check (const int *ary, int size, int gp, int wp, int vp)
31 int exit = 0;
32 int ix;
33 int *gangs = (int *)alloca (gp * sizeof (int));
34 int *workers = (int *)alloca (wp * sizeof (int));
35 int *vectors = (int *)alloca (vp * sizeof (int));
36 int offloaded = 0;
38 memset (gangs, 0, gp * sizeof (int));
39 memset (workers, 0, wp * sizeof (int));
40 memset (vectors, 0, vp * sizeof (int));
42 for (ix = 0; ix < size; ix++)
44 int g = (ary[ix] >> 16) & 0xff;
45 int w = (ary[ix] >> 8) & 0xff;
46 int v = (ary[ix] >> 0) & 0xff;
48 if (g >= gp || w >= wp || v >= vp)
50 printf ("unexpected cpu %#x used\n", ary[ix]);
51 exit = 1;
53 else
55 vectors[v]++;
56 workers[w]++;
57 gangs[g]++;
59 offloaded += ary[ix] >> 24;
62 if (!offloaded)
63 return 0;
65 if (offloaded != size)
67 printf ("offloaded %d times, expected %d\n", offloaded, size);
68 return 1;
71 for (ix = 0; ix < gp; ix++)
72 if (gangs[ix] != gangs[0])
74 printf ("gang %d not used %d times\n", ix, gangs[0]);
75 exit = 1;
78 for (ix = 0; ix < wp; ix++)
79 if (workers[ix] != workers[0])
81 printf ("worker %d not used %d times\n", ix, workers[0]);
82 exit = 1;
85 for (ix = 0; ix < vp; ix++)
86 if (vectors[ix] != vectors[0])
88 printf ("vector %d not used %d times\n", ix, vectors[0]);
89 exit = 1;
92 return exit;
95 #define N (32 *32*32)
97 int test_1 (int gp, int wp, int vp)
99 int ary[N];
100 int exit = 0;
102 #pragma acc parallel copyout (ary)
104 #pragma acc loop gang (static:1)
105 for (int ix = 0; ix < N; ix++)
106 ary[ix] = coord ();
109 exit |= check (ary, N, gp, 1, 1);
111 #pragma acc parallel copyout (ary)
113 #pragma acc loop worker
114 for (int ix = 0; ix < N; ix++)
115 ary[ix] = coord ();
118 exit |= check (ary, N, 1, wp, 1);
120 #pragma acc parallel copyout (ary)
122 #pragma acc loop vector
123 for (int ix = 0; ix < N; ix++)
124 ary[ix] = coord ();
127 exit |= check (ary, N, 1, 1, vp);
129 return exit;
132 int main ()
134 return test_1 (16, 16, 32);