aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386-expand.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-04-07 08:27:49 +0200
committerJakub Jelinek <jakub@redhat.com>2020-04-07 08:27:49 +0200
commitbee27152f7e6651f38c25ac68db13370382147e0 (patch)
tree089a85d991c57f6e359ef13e65d949642dd4dc54 /gcc/config/i386/i386-expand.c
parent467fc7c83abfe8fca8b75defac7c89f6c75bf9d7 (diff)
i386: Fix emit_reduc_half on V{64Q,32H}Imode [PR94500]
The following testcase is miscompiled in 8.x, because emit_reduc_half is prepared to handle for 512-bit modes only i equal to 512, 256, 128 and 64. V32HImode also needs i equal to 32 and V64QImode i equal to 32 and 16, but emit_reduc_half in that case performs a redundant permutation exactly like i == 32. In 9+ the testcase works because Richard in r9-3393 changed the reduc_* expanders so that they actually don't call ix86_expand_reduc on 512-bit modes, but only 128-bit ones. The patch fixes emit_reduc_half to handle also i of 32 and 16 similarly to how V32QImode/V16HImode are handled for AVX2. I think it shouldn't hurt to fix the function even on the trunk and 9 branch even when nothing uses it ATM. 2020-04-07 Jakub Jelinek <jakub@redhat.com> PR target/94500 * config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode handle i < 64 using avx512bw_lshrv4ti3. Formatting fixes. * gcc.target/i386/avx512bw-pr94500.c: New test.
Diffstat (limited to 'gcc/config/i386/i386-expand.c')
-rw-r--r--gcc/config/i386/i386-expand.c66
1 files changed, 37 insertions, 29 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 8e623b3707f..066de99e660 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -14891,43 +14891,51 @@ emit_reduc_half (rtx dest, rtx src, int i)
break;
case E_V64QImode:
case E_V32HImode:
+ if (i < 64)
+ {
+ d = gen_reg_rtx (V4TImode);
+ tem = gen_avx512bw_lshrv4ti3 (d, gen_lowpart (V4TImode, src),
+ GEN_INT (i / 2));
+ break;
+ }
+ /* FALLTHRU */
case E_V16SImode:
case E_V16SFmode:
case E_V8DImode:
case E_V8DFmode:
if (i > 128)
tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
- gen_lowpart (V16SImode, src),
- gen_lowpart (V16SImode, src),
- GEN_INT (0x4 + (i == 512 ? 4 : 0)),
- GEN_INT (0x5 + (i == 512 ? 4 : 0)),
- GEN_INT (0x6 + (i == 512 ? 4 : 0)),
- GEN_INT (0x7 + (i == 512 ? 4 : 0)),
- GEN_INT (0xC), GEN_INT (0xD),
- GEN_INT (0xE), GEN_INT (0xF),
- GEN_INT (0x10), GEN_INT (0x11),
- GEN_INT (0x12), GEN_INT (0x13),
- GEN_INT (0x14), GEN_INT (0x15),
- GEN_INT (0x16), GEN_INT (0x17));
+ gen_lowpart (V16SImode, src),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+ GEN_INT (0xC), GEN_INT (0xD),
+ GEN_INT (0xE), GEN_INT (0xF),
+ GEN_INT (0x10), GEN_INT (0x11),
+ GEN_INT (0x12), GEN_INT (0x13),
+ GEN_INT (0x14), GEN_INT (0x15),
+ GEN_INT (0x16), GEN_INT (0x17));
else
tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
- gen_lowpart (V16SImode, src),
- GEN_INT (i == 128 ? 0x2 : 0x1),
- GEN_INT (0x3),
- GEN_INT (0x3),
- GEN_INT (0x3),
- GEN_INT (i == 128 ? 0x6 : 0x5),
- GEN_INT (0x7),
- GEN_INT (0x7),
- GEN_INT (0x7),
- GEN_INT (i == 128 ? 0xA : 0x9),
- GEN_INT (0xB),
- GEN_INT (0xB),
- GEN_INT (0xB),
- GEN_INT (i == 128 ? 0xE : 0xD),
- GEN_INT (0xF),
- GEN_INT (0xF),
- GEN_INT (0xF));
+ gen_lowpart (V16SImode, src),
+ GEN_INT (i == 128 ? 0x2 : 0x1),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (i == 128 ? 0x6 : 0x5),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (i == 128 ? 0xA : 0x9),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (i == 128 ? 0xE : 0xD),
+ GEN_INT (0xF),
+ GEN_INT (0xF),
+ GEN_INT (0xF));
break;
default:
gcc_unreachable ();