Drop Z from X + Z < Y + Z
[official-gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / routine-wv-2.c
blobb5cbc9014d6567ba5e0ab2a8e2b48b3722f857d1
1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler.
3 { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
5 #include <stdio.h>
6 #include <openacc.h>
8 #define NUM_WORKERS 16
9 #define NUM_VECTORS 32
10 #define WIDTH 64
11 #define HEIGHT 32
13 #define WORK_ID(I,N) \
14 (acc_on_device (acc_device_nvidia) \
15 ? ({unsigned __r; \
16 __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \
17 __r; }) : (I % N))
18 #define VEC_ID(I,N) \
19 (acc_on_device (acc_device_nvidia) \
20 ? ({unsigned __r; \
21 __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \
22 __r; }) : (I % N))
24 #pragma acc routine worker
25 void __attribute__ ((noinline))
26 WorkVec (int *ptr, int w, int h, int nw, int nv)
28 #pragma acc loop worker
29 for (int i = 0; i < h; i++)
30 #pragma acc loop vector
31 for (int j = 0; j < w; j++)
32 ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv);
35 int DoWorkVec (int nw)
37 int ary[HEIGHT][WIDTH];
38 int err = 0;
40 for (int ix = 0; ix != HEIGHT; ix++)
41 for (int jx = 0; jx != WIDTH; jx++)
42 ary[ix][jx] = 0xdeadbeef;
44 printf ("spawning %d ...", nw); fflush (stdout);
46 #pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
48 WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
51 for (int ix = 0; ix != HEIGHT; ix++)
52 for (int jx = 0; jx != WIDTH; jx++)
54 int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS);
56 if (ary[ix][jx] != exp)
58 printf ("\nary[%d][%d] = %#x expected %#x", ix, jx,
59 ary[ix][jx], exp);
60 err = 1;
63 printf (err ? " failed\n" : " ok\n");
65 return err;
68 int main ()
70 int err = 0;
72 for (int W = 1; W <= NUM_WORKERS; W <<= 1)
73 err |= DoWorkVec (W);
75 return err;