summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-08-31 15:57:17 +0000
committerSanjay Patel <spatel@rotateright.com>2017-08-31 15:57:17 +0000
commit001a6343a095e3dcfcd088d9d3546deb36bdc5fa (patch)
tree24e2abf201a92755fcf32ae35bbc163ae0631c89
parent44c3f5f86f9ffa2f42d2f59558deeb42621b190b (diff)
[InstCombine] improve demanded vector elements analysis of insertelement
Recurse instead of returning on the first found optimization. Also, return early in the caller instead of continuing because that allows another round of simplification before we might potentially lose undef information from a shuffle mask by eliminating the shuffle. As noted in the review, we could probably do better and be more efficient by moving all of demanded elements into a separate pass, but this is yet another quick fix to instcombine. Differential Revision: https://reviews.llvm.org/D37236
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp19
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp4
-rw-r--r--llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll5
-rw-r--r--llvm/test/Transforms/InstCombine/vec_demanded_elts.ll17
4 files changed, 19 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index cda2ce6c91c..7d5d28f6fc4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -993,22 +993,23 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
+ // The element inserted overwrites whatever was there, so the input demanded
+ // set is simpler than the output set.
+ unsigned IdxNo = Idx->getZExtValue();
+ APInt PreInsertDemandedElts = DemandedElts;
+ if (IdxNo < VWidth)
+ PreInsertDemandedElts.clearBit(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
// If this is inserting an element that isn't demanded, remove this
// insertelement.
- unsigned IdxNo = Idx->getZExtValue();
if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
Worklist.Add(I);
return I->getOperand(0);
}
- // Otherwise, the element inserted overwrites whatever was there, so the
- // input demanded set is simpler than the output set.
- APInt DemandedElts2 = DemandedElts;
- DemandedElts2.clearBit(IdxNo);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
- UndefElts, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-
// The inserted element is defined.
UndefElts.clearBit(IdxNo);
break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 7380ec21cae..0d327795acf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1165,9 +1165,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
if (V != &SVI)
return replaceInstUsesWith(SVI, V);
- LHS = SVI.getOperand(0);
- RHS = SVI.getOperand(1);
- MadeChange = true;
+ return &SVI;
}
unsigned LHSWidth = LHS->getType()->getVectorNumElements();
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
index f181ef57fe2..d3ffd178010 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
@@ -485,9 +485,8 @@ define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask,
define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
; CHECK-LABEL: @demanded_elts_insertion_avx2(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
-; CHECK-NEXT: ret <32 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %BaseMask)
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
%1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
%2 = insertelement <32 x i8> %1, i8 %M22, i32 22
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index bc935c89bb9..9d59efbad73 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -142,13 +142,11 @@ define <2 x i64> @PR24922(<2 x i64> %v) {
ret <2 x i64> %result
}
-; FIXME: The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
; CHECK-LABEL: @inselt_shuf_no_demand(
-; CHECK-NEXT: [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT: ret <4 x float> [[OUT12]]
+; CHECK-NEXT: ret <4 x float> undef
;
%out1 = insertelement <4 x float> undef, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 2
@@ -157,13 +155,11 @@ define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
ret <4 x float> %shuffle
}
-; FIXME: The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) {
; CHECK-LABEL: @inselt_shuf_no_demand_commute(
-; CHECK-NEXT: [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT: ret <4 x float> [[OUT12]]
+; CHECK-NEXT: ret <4 x float> undef
;
%out1 = insertelement <4 x float> undef, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 2
@@ -172,15 +168,14 @@ define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a
ret <4 x float> %shuffle
}
-; FIXME: The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
+; The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
; use 'out012'. The analysis should be able to see past that.
define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) {
; CHECK-LABEL: @inselt_shuf_no_demand_multiuse(
; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 %a0, i32 0
; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 %a1, i32 1
-; CHECK-NEXT: [[OUT012:%.*]] = insertelement <4 x i32> [[OUT01]], i32 %a0, i32 2
-; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT012]], %b
+; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], %b
; CHECK-NEXT: ret <4 x i32> [[FOO]]
;
%out0 = insertelement <4 x i32> undef, i32 %a0, i32 0