/* { dg-options "-O2" } */ /* { dg-do run } */ /* nvptx backend used to emit lo/ls/hs/hi suffixes on unsigned comparison insns instead of the more common lt/le/ge/gt, but ptxas and PTX JIT miscompile 'ls' and 'hi' under some circumstances, such as when the first source operand expands to a constant memory load, as demonstrated below. Reported as NVIDIA bug ID 1725195 (tracker is not public). */ /* Define this to observe PTX translation breakage. */ //#define EMIT_BROKEN_ASM 1 /* Or define this to get expected codegen. */ //#define EMIT_WORKING_ASM 1 static __attribute__((noinline,noclone)) int ls(unsigned a) { unsigned v; /* %nctaid.x is always 1 in gcc testing. */ asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v)); #if defined(EMIT_BROKEN_ASM) asm ("set.u32.ls.u32 %0, %1, %0;" : "+r"(a) : "r"(v)); #elif defined(EMIT_WORKING_ASM) asm ("set.u32.le.u32 %0, %1, %0;" : "+r"(a) : "r"(v)); #else a = v <= a ? -1 : 0; #endif return a; } static __attribute__((noinline,noclone)) int hi(unsigned a) { unsigned v; asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v)); #if defined(EMIT_BROKEN_ASM) asm ("set.u32.hi.u32 %0, %1, %0;" : "+r"(a) : "r"(v)); #elif defined(EMIT_WORKING_ASM) asm ("set.u32.gt.u32 %0, %1, %0;" : "+r"(a) : "r"(v)); #else a = v > a ? -1 : 0; #endif return a; } int main() { int i; for (i=0; i<3; i++) if (ls(i) != -(1 <= i) || hi(i) != -(1 > i)) __builtin_abort(); return 0; }