1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler.
3 { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
7 #define N (32*32*32+17)
9 #pragma acc routine gang
10 void __attribute__ ((noinline
)) gang (int ary
[N
])
13 for (unsigned ix
= 0; ix
< N
; ix
++)
15 if (__builtin_acc_on_device (5))
17 int g
= 0, w
= 0, v
= 0;
19 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
20 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
21 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
22 ary
[ix
] = (g
<< 16) | (w
<< 8) | v
;
36 for (ix
= 0; ix
< N
;ix
++)
39 #pragma acc parallel num_gangs(32) copy(ary) copy(ondev)
41 ondev
= __builtin_acc_on_device (5);
45 for (ix
= 0; ix
< N
; ix
++)
50 int g
= ix
/ ((N
+ 31) / 32);
54 expected
= (g
<< 16) | (w
<< 8) | v
;
57 if (ary
[ix
] != expected
)
60 printf ("ary[%d]=%x expected %x\n", ix
, ary
[ix
], expected
);