2 /* { dg-additional-options "-O2" */
6 #define N (32*32*32+17)
8 #pragma acc routine gang
9 void __attribute__ ((noinline
)) gang (int ary
[N
])
11 #pragma acc loop gang worker vector
12 for (unsigned ix
= 0; ix
< N
; ix
++)
14 if (__builtin_acc_on_device (5))
16 int g
= 0, w
= 0, v
= 0;
18 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
19 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
20 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
21 ary
[ix
] = (g
<< 16) | (w
<< 8) | v
;
35 for (ix
= 0; ix
< N
;ix
++)
38 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
40 ondev
= __builtin_acc_on_device (5);
44 for (ix
= 0; ix
< N
; ix
++)
49 int chunk_size
= (N
+ 32*32*32 - 1) / (32*32*32);
51 int g
= ix
/ (chunk_size
* 32 * 32);
55 expected
= (g
<< 16) | (w
<< 8) | v
;
58 if (ary
[ix
] != expected
)
61 printf ("ary[%d]=%x expected %x\n", ix
, ary
[ix
], expected
);