Set num_threads to 50 on 32-bit hppa in two libgomp loop tests
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / loop-dim-default.c
blob419bc33ad536ee2cbd690f81957f4f6508eeee7f
1 /* { dg-additional-options "-fopenacc-dim=16:16" } */
3 #include <openacc.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <gomp-constants.h>
8 #pragma acc routine
9 static int __attribute__ ((noinline)) coord ()
11 int res = 0;
13 if (acc_on_device (acc_device_not_host))
15 int g, w, v;
17 g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
18 w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
19 v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
20 res = (1 << 24) | (g << 16) | (w << 8) | v;
22 return res;
26 int check (const int *ary, int size, int gp, int wp, int vp)
28 int exit = 0;
29 int ix;
30 int *gangs = (int *)__builtin_alloca (gp * sizeof (int));
31 int *workers = (int *)__builtin_alloca (wp * sizeof (int));
32 int *vectors = (int *)__builtin_alloca (vp * sizeof (int));
33 int offloaded = 0;
35 memset (gangs, 0, gp * sizeof (int));
36 memset (workers, 0, wp * sizeof (int));
37 memset (vectors, 0, vp * sizeof (int));
39 for (ix = 0; ix < size; ix++)
41 int g = (ary[ix] >> 16) & 0xff;
42 int w = (ary[ix] >> 8) & 0xff;
43 int v = (ary[ix] >> 0) & 0xff;
45 if (g >= gp || w >= wp || v >= vp)
47 printf ("unexpected cpu %#x used\n", ary[ix]);
48 exit = 1;
50 else
52 vectors[v]++;
53 workers[w]++;
54 gangs[g]++;
56 offloaded += ary[ix] >> 24;
59 if (!offloaded)
60 return 0;
62 if (offloaded != size)
64 printf ("offloaded %d times, expected %d\n", offloaded, size);
65 return 1;
68 for (ix = 0; ix < gp; ix++)
69 if (gangs[ix] != gangs[0])
71 printf ("gang %d not used %d times\n", ix, gangs[0]);
72 exit = 1;
75 for (ix = 0; ix < wp; ix++)
76 if (workers[ix] != workers[0])
78 printf ("worker %d not used %d times\n", ix, workers[0]);
79 exit = 1;
82 for (ix = 0; ix < vp; ix++)
83 if (vectors[ix] != vectors[0])
85 printf ("vector %d not used %d times\n", ix, vectors[0]);
86 exit = 1;
89 return exit;
92 #define N (32 *32*32)
94 int test_1 (int gp, int wp, int vp)
96 int ary[N];
97 int exit = 0;
99 #pragma acc parallel copyout (ary)
101 #pragma acc loop gang (static:1)
102 for (int ix = 0; ix < N; ix++)
103 ary[ix] = coord ();
106 exit |= check (ary, N, gp, 1, 1);
108 #pragma acc parallel copyout (ary)
110 #pragma acc loop worker
111 for (int ix = 0; ix < N; ix++)
112 ary[ix] = coord ();
115 exit |= check (ary, N, 1, wp, 1);
117 #pragma acc parallel copyout (ary)
119 #pragma acc loop vector
120 for (int ix = 0; ix < N; ix++)
121 ary[ix] = coord ();
124 exit |= check (ary, N, 1, 1, vp);
126 return exit;
129 int main ()
131 #ifdef ACC_DEVICE_TYPE_radeon
132 /* AMD GCN uses the autovectorizer for the vector dimension: the use
133 of a function call in vector-partitioned code in this test is not
134 currently supported. */
135 return test_1 (16, 16, 1);
136 #else
137 return test_1 (16, 16, 32);
138 #endif