aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2022-08-05 12:08:03 -0700
committerPhilip Reames <listmail@philipreames.com>2022-08-05 12:13:08 -0700
commit9a9848f4b95895ad97d3dc117f0a94773dc1607f (patch)
tree90a2243c732c5e15dd6f26df353173010c12dc3b
parent99e50e583867ac35ace36f5da50b3a3ff7c51d2e (diff)
[RISCVInsertVSETVLI] Remove an unsound optimization
This fixes a bug reported privately by @craig.topper. Here's an example which illustrates the problem: vsetivli a1, a0, e32, m1, ta, mu # both DefInfo and PrevInfo vsetivli a2, a1, e32, m4, ta, mu With the unsound result being: vsetivli a1, a0, e32, m1, ta, mu vsetivli a2, a0, e32, m4, ta, mu Consider the case where this is running on a machine with VLEN=512,. For this case, the VLMAXs are 16 and 64 respectively. Consider for a0 = 33. The correct result is: a1 = 16, and a2 = 16 After the unsound optimization: a1 = 16 and a2 = 33 This particular example used VLMAXs which differed by more than a power of two. With a difference of only one power of two, there's another form of this bug which involves the AVL < 2 x VLMAX special case, but that ones more complicated to construct as many examples turn out accidentally sound. This patch takes the approach of simply removing the unsound optimization, but there are multiple sound sub-cases of it. I plan to return to at least a couple of them, but figured it was cleaner to remove the unsound optimization (for ease of backporting), and then review the new optimizations on their own. Differential Revision: https://reviews.llvm.org/D131264
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp25
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll11
3 files changed, 13 insertions, 25 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index fc0a983f6542..5d9bd2f67558 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1022,16 +1022,10 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M
return;
}
- // Two cases involving an AVL resulting from a previous vsetvli.
- // 1) If the AVL is the result of a previous vsetvli which has the
- // same AVL and VLMAX as our current state, we can reuse the AVL
- // from the current state for the new one. This allows us to
- // generate 'vsetvli x0, x0, vtype" or possible skip the transition
- // entirely.
- // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
- // replace the AVL operand with the AVL of the defining vsetvli.
- // We avoid general register AVLs to avoid extending live ranges
- // without being sure we can kill the original source reg entirely.
+ // If AVL is defined by a vsetvli with the same VLMAX, we can
+ // replace the AVL operand with the AVL of the defining vsetvli.
+ // We avoid general register AVLs to avoid extending live ranges
+ // without being sure we can kill the original source reg entirely.
if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
return;
MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
@@ -1039,17 +1033,6 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M
return;
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- // case 1
- if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
- DefInfo.hasSameAVL(PrevInfo) &&
- DefInfo.hasSameVLMAX(PrevInfo)) {
- if (PrevInfo.hasAVLImm())
- Info.setAVLImm(PrevInfo.getAVLImm());
- else
- Info.setAVLReg(PrevInfo.getAVLReg());
- return;
- }
- // case 2
if (DefInfo.hasSameVLMAX(Info) &&
(DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
if (DefInfo.hasAVLImm())
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 2184925214c2..65d1bf6a026f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -837,7 +837,7 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a0, 1
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: beqz a1, .LBB18_2
; CHECK-NEXT: # %bb.1: # %if
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index d3c3712f889f..728139b75e6b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -18,6 +18,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64(
define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, mu
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
@@ -412,7 +413,7 @@ define i64 @avl_forward1b_neg(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p) nou
; CHECK-LABEL: avl_forward1b_neg:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli a1, 6, e16, m1, ta, mu
-; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ret
@@ -467,6 +468,7 @@ entry:
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, mu
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
@@ -480,10 +482,10 @@ entry:
define i64 @avl_forward4b(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a2, a1, e16, m1, ta, mu
+; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, mu
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ret
entry:
%vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 1, i64 0)
@@ -496,6 +498,7 @@ entry:
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, mu
@@ -516,6 +519,7 @@ entry:
define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff2:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a2
@@ -541,6 +545,7 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
; CHECK-LABEL: avl_forward5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret