diff options
author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-11-13 08:37:09 +0000 |
---|---|---|
committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-11-13 08:37:09 +0000 |
commit | 4cc00e863e2952df51aac8cd6fbc25b74fba0794 (patch) | |
tree | c425a1cd39efc5f55a44a982bc7ce031cb80fb4a /test | |
parent | 01a109e15c61e3bf86c5b35f4b035185a6c4d48e (diff) |
[SystemZ] Increase the number of VLREPslinaro-local/diana.picus/a
If a loaded value is replicated it is best to combine these two operations
into a VLREP (load and replicate), but isel will not produce this if the load
has other users as well.
This patch handles this by putting the other users of the load to use the
REPLICATE 0-element instead of the load. This way the load has only the
REPLICATE node as user, and we get a VLREP.
Review: Ulrich Weigand
https://reviews.llvm.org/D54264
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346746 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/SystemZ/vec-move-21.ll | 56 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-move-22.ll | 15 |
2 files changed, 71 insertions, 0 deletions
diff --git a/test/CodeGen/SystemZ/vec-move-21.ll b/test/CodeGen/SystemZ/vec-move-21.ll new file mode 100644 index 00000000000..47ad0371743 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-move-21.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that a replicate of a load gets folded to vlrep also in cases where +; the load has multiple users. + +; CHECK-NOT: vrep + + +define double @fun(double* %Vsrc, <2 x double> %T) { +entry: + %Vgep1 = getelementptr double, double* %Vsrc, i64 0 + %Vld1 = load double, double* %Vgep1 + %Vgep2 = getelementptr double, double* %Vsrc, i64 1 + %Vld2 = load double, double* %Vgep2 + %Vgep3 = getelementptr double, double* %Vsrc, i64 2 + %Vld3 = load double, double* %Vgep3 + %Vgep4 = getelementptr double, double* %Vsrc, i64 3 + %Vld4 = load double, double* %Vgep4 + %Vgep5 = getelementptr double, double* %Vsrc, i64 4 + %Vld5 = load double, double* %Vgep5 + %Vgep6 = getelementptr double, double* %Vsrc, i64 5 + %Vld6 = load double, double* %Vgep6 + + %V19 = insertelement <2 x double> undef, double %Vld1, i32 0 + %V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer + %V21 = insertelement <2 x double> undef, double %Vld4, i32 0 + %V22 = insertelement <2 x double> %V21, double %Vld5, i32 1 + %V23 = fmul <2 x double> %V20, %V22 + %V24 = fadd <2 x double> %T, %V23 + %V25 = insertelement <2 x double> %V19, double %Vld2, i32 1 + %V26 = insertelement <2 x double> undef, double %Vld6, i32 0 + %V27 = insertelement <2 x double> %V26, double %Vld6, i32 1 + %V28 = fmul <2 x double> %V25, %V27 + %V29 = fadd <2 x double> %T, %V28 + %V30 = insertelement <2 x double> undef, double %Vld2, i32 0 + %V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer + %V32 = insertelement <2 x double> undef, double %Vld5, i32 0 + %V33 = insertelement <2 x double> %V32, double %Vld6, i32 1 + %V34 = fmul <2 x double> %V31, %V33 + %V35 = fadd <2 x double> %T, %V34 + %V36 = insertelement <2 x double> undef, double %Vld3, i32 0 + %V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer + %V38 = fmul <2 x double> %V37, %V22 + %V39 = fadd <2 x double> %T, %V38 + %Vmul37 = fmul double %Vld3, %Vld6 + %Vadd38 = fadd double %Vmul37, %Vmul37 + + %VA0 = fadd <2 x double> %V24, %V29 + %VA1 = fadd <2 x double> %VA0, %V35 + %VA2 = fadd <2 x double> %VA1, %V39 + + %VE0 = extractelement <2 x double> %VA2, i32 0 + %VS1 = fadd double %VE0, %Vadd38 + + ret double %VS1 +} diff --git a/test/CodeGen/SystemZ/vec-move-22.ll b/test/CodeGen/SystemZ/vec-move-22.ll new file mode 100644 index 00000000000..2508a9e6da2 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-move-22.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that a loaded value which is used both in a vector and scalar context +; is not transformed to a vlrep + vlgvg. + +; CHECK-NOT: vlrep + +define void @fun(i64 %arg, i64** %Addr, <2 x i64*>* %Dst) { + %tmp10 = load i64*, i64** %Addr + store i64 %arg, i64* %tmp10 + %tmp12 = insertelement <2 x i64*> undef, i64* %tmp10, i32 0 + %tmp13 = insertelement <2 x i64*> %tmp12, i64* %tmp10, i32 1 + store <2 x i64*> %tmp13, <2 x i64*>* %Dst + ret void +} |