aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c
blob: a0cf5c2d468033ce0081b653bc9539d8c9b69fa5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* { dg-options "-O2" } */
/* { dg-do run } */

/* nvptx backend used to emit lo/ls/hs/hi suffixes on unsigned comparison
   insns instead of the more common lt/le/ge/gt, but ptxas and PTX JIT
   miscompile 'ls' and 'hi' under some circumstances, such as when the first
   source operand expands to a constant memory load, as demonstrated below.
   Reported as NVIDIA bug ID 1725195 (tracker is not public).  */

/* Define this to observe PTX translation breakage.  */
//#define EMIT_BROKEN_ASM 1

/* Or define this to get expected codegen.  */
//#define EMIT_WORKING_ASM 1

static __attribute__((noinline,noclone)) int ls(unsigned a)
{
    unsigned v;
    /* %nctaid.x is always 1 in gcc testing.  */
    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
#if defined(EMIT_BROKEN_ASM)
    asm ("set.u32.ls.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
#elif defined(EMIT_WORKING_ASM)
    asm ("set.u32.le.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
#else
    a = v <= a ? -1 : 0;
#endif
    return a;
}
static __attribute__((noinline,noclone)) int hi(unsigned a)
{
    unsigned v;
    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
#if defined(EMIT_BROKEN_ASM)
    asm ("set.u32.hi.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
#elif defined(EMIT_WORKING_ASM)
    asm ("set.u32.gt.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
#else
    a = v > a ? -1 : 0;
#endif
    return a;
}
int main()
{
    int i;
    for (i=0; i<3; i++)
        if (ls(i) != -(1 <= i) || hi(i) != -(1 > i))
            __builtin_abort();
    return 0;
}