2 /* { dg-additional-options "-O2" */
6 #define N (32*32*32+17)
8 #pragma acc routine vector
9 void __attribute__ ((noinline
)) vector (int ary
[N
])
11 #pragma acc loop vector
12 for (unsigned ix
= 0; ix
< N
; ix
++)
14 if (__builtin_acc_on_device (5))
16 int g
= 0, w
= 0, v
= 0;
18 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
19 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
20 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
21 ary
[ix
] = (g
<< 16) | (w
<< 8) | v
;
35 for (ix
= 0; ix
< N
;ix
++)
38 #pragma acc parallel vector_length(32) copy(ary) copy(ondev)
40 ondev
= __builtin_acc_on_device (5);
44 for (ix
= 0; ix
< N
; ix
++)
53 expected
= (g
<< 16) | (w
<< 8) | v
;
56 if (ary
[ix
] != expected
)
59 printf ("ary[%d]=%x expected %x\n", ix
, ary
[ix
], expected
);