2 /* { dg-additional-options "-O2" */
6 #define N (32*32*32+17)
14 for (ix
= 0; ix
< N
;ix
++)
17 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
19 #pragma acc loop gang worker vector
20 for (unsigned ix
= 0; ix
< N
; ix
++)
22 if (__builtin_acc_on_device (5))
24 int g
= 0, w
= 0, v
= 0;
26 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
27 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
28 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
29 ary
[ix
] = (g
<< 16) | (w
<< 8) | v
;
37 for (ix
= 0; ix
< N
; ix
++)
42 int chunk_size
= (N
+ 32*32*32 - 1) / (32*32*32);
44 int g
= ix
/ (chunk_size
* 32 * 32);
48 expected
= (g
<< 16) | (w
<< 8) | v
;
51 if (ary
[ix
] != expected
)
54 printf ("ary[%d]=%x expected %x\n", ix
, ary
[ix
], expected
);