summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-10-01 14:40:00 +0000
committerSanjay Patel <spatel@rotateright.com>2018-10-01 14:40:00 +0000
commitaaa4b57f66861ad0afc044030ce216d81e682851 (patch)
tree1968602fb768894b848b77ebd6083b5d0bb412c6
parent931652e69a356924c63a69d073b22cbf3f556358 (diff)
[InstCombine] try to convert vector insert+extract to trunc; 2nd try
This was originally committed at rL343407, but reverted at rL343458 because it crashed trying to handle a case where the destination type is FP. This version of the patch adds a check for that possibility. Tests added at rL343480. Original commit message: This transform is requested for the backend in: https://bugs.llvm.org/show_bug.cgi?id=39016 ...but I figured it was worth doing in IR too, and it's probably easier to implement here, so that's this patch. In the simplest case, we are just truncating a scalar value. If the extract index doesn't correspond to the LSBs of the scalar, then we have to shift-right before the truncate. Endian-ness makes this tricky, but hopefully the ASCII-art helps visualize the transform. Differential Revision: https://reviews.llvm.org/D52439
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp48
-rw-r--r--llvm/test/Transforms/InstCombine/extractelement.ll86
2 files changed, 101 insertions, 33 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c391034dc00..945664de686 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -167,7 +167,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
}
static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
- InstCombiner::BuilderTy &Builder) {
+ InstCombiner::BuilderTy &Builder,
+ bool IsBigEndian) {
Value *X;
uint64_t ExtIndexC;
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
@@ -186,6 +187,49 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
if (Value *Elt = findScalarElement(X, ExtIndexC))
return new BitCastInst(Elt, DestTy);
+ // If the source elements are wider than the destination, try to shift and
+ // truncate a subset of scalar bits of an insert op.
+ // TODO: This is limited to integer types, but we could bitcast to/from FP.
+ if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy() &&
+ DestTy->getScalarType()->isIntegerTy()) {
+ Value *Scalar;
+ uint64_t InsIndexC;
+ if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
+ m_ConstantInt(InsIndexC))))
+ return nullptr;
+
+ // The extract must be from the subset of vector elements that we inserted
+ // into. Example: if we inserted element 1 of a <2 x i64> and we are
+ // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
+ // of elements 4-7 of the bitcasted vector.
+ unsigned NarrowingRatio = NumElts / NumSrcElts;
+ if (ExtIndexC / NarrowingRatio != InsIndexC)
+ return nullptr;
+
+ // We are extracting part of the original scalar. How that scalar is
+ // inserted into the vector depends on the endian-ness. Example:
+ // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
+ // +--+--+--+--+--+--+--+--+
+ // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
+ // extelt <4 x i16> V', 3: | |S2|S3|
+ // +--+--+--+--+--+--+--+--+
+ // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
+ // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
+ // In this example, we must right-shift little-endian. Big-endian is just a
+ // truncate.
+ unsigned Chunk = ExtIndexC % NarrowingRatio;
+ if (IsBigEndian)
+ Chunk = NarrowingRatio - 1 - Chunk;
+ unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits();
+ if (ShAmt) {
+ // Bail out if we could end with more instructions than we started with.
+ if (!Ext.getVectorOperand()->hasOneUse())
+ return nullptr;
+ Scalar = Builder.CreateLShr(Scalar, ShAmt);
+ }
+ return new TruncInst(Scalar, DestTy);
+ }
+
return nullptr;
}
@@ -224,7 +268,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- if (Instruction *I = foldBitcastExtElt(EI, Builder))
+ if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
return I;
// If there's a vector PHI feeding a scalar use through this extractelement
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index f29c020b2d2..65ef373dd48 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -42,11 +42,14 @@ define i64 @test2(i64 %in) {
}
define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_zero_elt(
-; ANY-NEXT: [[I:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; ANY-NEXT: [[B:%.*]] = bitcast <2 x i64> [[I]] to <4 x i32>
-; ANY-NEXT: [[R:%.*]] = extractelement <4 x i32> [[B]], i32 0
-; ANY-NEXT: ret i32 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
+; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
+; LE-NEXT: ret i32 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
+; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
+; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
@@ -55,11 +58,14 @@ define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
}
define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
-; ANY-NEXT: [[I:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; ANY-NEXT: [[B:%.*]] = bitcast <2 x i64> [[I]] to <8 x i16>
-; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 4
-; ANY-NEXT: ret i16 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
+; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i16
+; LE-NEXT: ret i16 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
+; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48
+; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16
+; BE-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x i64> undef, i64 %x, i32 1
%b = bitcast <2 x i64> %i to <8 x i16>
@@ -68,11 +74,14 @@ define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
}
define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
-; ANY-NEXT: [[I:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; ANY-NEXT: [[B:%.*]] = bitcast <2 x i64> [[I]] to <4 x i32>
-; ANY-NEXT: [[R:%.*]] = extractelement <4 x i32> [[B]], i32 1
-; ANY-NEXT: ret i32 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
+; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
+; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; LE-NEXT: ret i32 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
+; BE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
+; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> undef, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
@@ -81,11 +90,15 @@ define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
}
define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
-; ANY-NEXT: [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
-; ANY-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
-; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2
-; ANY-NEXT: ret i8 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
+; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
+; LE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
+; LE-NEXT: ret i8 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
+; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
+; BE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
+; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> undef, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>
@@ -94,11 +107,15 @@ define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) {
}
define i3 @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
-; ANY-NEXT: [[I:%.*]] = insertelement <3 x i15> undef, i15 [[X:%.*]], i32 0
-; ANY-NEXT: [[B:%.*]] = bitcast <3 x i15> [[I]] to <15 x i3>
-; ANY-NEXT: [[R:%.*]] = extractelement <15 x i3> [[B]], i32 1
-; ANY-NEXT: ret i3 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
+; LE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3
+; LE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
+; LE-NEXT: ret i3 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
+; BE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9
+; BE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
+; BE-NEXT: ret i3 [[R]]
;
%i = insertelement <3 x i15> undef, i15 %x, i32 0
%b = bitcast <3 x i15> %i to <15 x i3>
@@ -125,12 +142,19 @@ define i8 @bitcasted_inselt_wide_source_wrong_insert(<2 x i32> %v, i32 %x) {
declare void @use(<8 x i8>)
define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
-; ANY-LABEL: @bitcasted_inselt_wide_source_uses(
-; ANY-NEXT: [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
-; ANY-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
-; ANY-NEXT: call void @use(<8 x i8> [[B]])
-; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
-; ANY-NEXT: ret i8 [[R]]
+; LE-LABEL: @bitcasted_inselt_wide_source_uses(
+; LE-NEXT: [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
+; LE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
+; LE-NEXT: call void @use(<8 x i8> [[B]])
+; LE-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
+; LE-NEXT: ret i8 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_uses(
+; BE-NEXT: [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
+; BE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
+; BE-NEXT: call void @use(<8 x i8> [[B]])
+; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i8
+; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> undef, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>