1 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
2 not optimized away at -O0, and then confuses the target assembler.
3 { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
7 #define N (32*32*32+17)
15 for (ix
= 0; ix
< N
;ix
++)
18 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
20 #pragma acc loop gang worker vector
21 for (unsigned ix
= 0; ix
< N
; ix
++)
23 if (__builtin_acc_on_device (5))
25 int g
= 0, w
= 0, v
= 0;
27 __asm__
volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g
));
28 __asm__
volatile ("mov.u32 %0,%%tid.y;" : "=r" (w
));
29 __asm__
volatile ("mov.u32 %0,%%tid.x;" : "=r" (v
));
30 ary
[ix
] = (g
<< 16) | (w
<< 8) | v
;
38 for (ix
= 0; ix
< N
; ix
++)
43 int chunk_size
= (N
+ 32*32*32 - 1) / (32*32*32);
45 int g
= ix
/ (chunk_size
* 32 * 32);
49 expected
= (g
<< 16) | (w
<< 8) | v
;
52 if (ary
[ix
] != expected
)
55 printf ("ary[%d]=%x expected %x\n", ix
, ary
[ix
], expected
);