1 /* { dg-options "-O2" } */
4 /* nvptx backend used to emit lo/ls/hs/hi suffixes on unsigned comparison
5 insns instead of the more common lt/le/ge/gt, but ptxas and PTX JIT
6 miscompile 'ls' and 'hi' under some circumstances, such as when the first
7 source operand expands to a constant memory load, as demonstrated below.
8 Reported as NVIDIA bug ID 1725195 (tracker is not public). */
10 /* Define this to observe PTX translation breakage. */
11 //#define EMIT_BROKEN_ASM 1
13 /* Or define this to get expected codegen. */
14 //#define EMIT_WORKING_ASM 1
16 static __attribute__((noinline
,noclone
)) int ls(unsigned a
)
19 /* %nctaid.x is always 1 in gcc testing. */
20 asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v
));
21 #if defined(EMIT_BROKEN_ASM)
22 asm ("set.u32.ls.u32 %0, %1, %0;" : "+r"(a
) : "r"(v
));
23 #elif defined(EMIT_WORKING_ASM)
24 asm ("set.u32.le.u32 %0, %1, %0;" : "+r"(a
) : "r"(v
));
30 static __attribute__((noinline
,noclone
)) int hi(unsigned a
)
33 asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v
));
34 #if defined(EMIT_BROKEN_ASM)
35 asm ("set.u32.hi.u32 %0, %1, %0;" : "+r"(a
) : "r"(v
));
36 #elif defined(EMIT_WORKING_ASM)
37 asm ("set.u32.gt.u32 %0, %1, %0;" : "+r"(a
) : "r"(v
));
47 if (ls(i
) != -(1 <= i
) || hi(i
) != -(1 > i
))