1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler.
3 { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
7 #define N (32*32*32+17)
14 #pragma acc parallel num_gangs(32) copy(ondev)
16 #pragma acc loop gang reduction (+:t)
17 for (unsigned ix
= 0; ix
< N
; ix
++)
21 if (__builtin_acc_on_device (5))
23 int g
= 0, w
= 0, v
= 0;
25 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
26 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
27 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
28 val
= (g
<< 16) | (w
<< 8) | v
;
35 for (ix
= 0; ix
< N
; ix
++)
40 int g
= ix
/ ((N
+ 31) / 32);
44 val
= (g
<< 16) | (w
<< 8) | v
;
50 printf ("t=%x expected %x\n", t
, h
);