summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Kyriazis <george.kyriazis@intel.com>2018-04-10 12:03:41 -0500
committerGeorge Kyriazis <george.kyriazis@intel.com>2018-04-18 10:51:38 -0500
commit99fe90722d5f613482ca388c0d0aca4b4a5d21d0 (patch)
treecfbad2a97a726c0b275209c57d280930e7120a0e
parent0899122c03f06eba89889090b1fb1ab1d4d3ddff (diff)
swr/rast: Replace x86 VMOVMSK with llvm-only implementation
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py1
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp25
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp1
5 files changed, 26 insertions, 5 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 9c1e9e0ac8..bced657644 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -58,7 +58,6 @@ intrinsics = [
['VPTESTC', ['a', 'b'], 'mInt32Ty'],
['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
['VFMADDPS', ['a', 'b', 'c'], 'a'],
- ['VMOVMSKPS', ['a'], 'mInt32Ty'],
['VPHADDD', ['a', 'b'], 'a'],
['PDEP32', ['a', 'b'], 'a'],
['RDTSC', [], 'mInt64Ty'],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index f0cd4413d3..5b70b29afb 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -608,7 +608,7 @@ namespace SwrJit
pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
- Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty));
+ Value* pMask = VMOVMSK(vMask);
// Setup loop basic block
BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index aa9e2dddee..f8936930b7 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -525,6 +525,28 @@ namespace SwrJit
return S_EXT(mask, mSimd16Int32Ty);
}
+ /// @brief Convert <Nxi1> llvm mask to integer
+ Value *Builder::VMOVMSK(Value* mask)
+ {
+ SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
+ uint32_t numLanes = mask->getType()->getVectorNumElements();
+ Value* i32Result;
+ if (numLanes == 8)
+ {
+ i32Result = BITCAST(mask, mInt8Ty);
+ }
+ else if (numLanes == 16)
+ {
+ i32Result = BITCAST(mask, mInt16Ty);
+ }
+ else
+ {
+ SWR_ASSERT("Unsupported vector width");
+ i32Result = BITCAST(mask, mInt8Ty);
+ }
+ return Z_EXT(i32Result, mInt32Ty);
+ }
+
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a VPSHUFB operation in LLVM IR. If not
/// supported on the underlying platform, emulate it
@@ -768,8 +790,7 @@ namespace SwrJit
/// @brief pop count on vector mask (e.g. <8 x i1>)
Value* Builder::VPOPCNT(Value* a)
{
- Value* b = BITCAST(VMASK(a), mSimdFP32Ty);
- return POPCNT(VMOVMSKPS(b));
+ return POPCNT(VMOVMSK(a));
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 7308821c89..bd4be9ffe2 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -102,6 +102,8 @@ Value *MASK_16(Value *vmask);
Value *VMASK(Value *mask);
Value *VMASK_16(Value *mask);
+Value *VMOVMSK(Value *mask);
+
//////////////////////////////////////////////////////////////////////////
/// @brief functions that build IR to call x86 intrinsics directly, or
/// emulate them with other instructions if not available on the host
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 7cfa772498..856d67d2bc 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -79,7 +79,6 @@ namespace SwrJit
{"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
{"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
- {"meta.intrinsic.VMOVMSKPS", Intrinsic::x86_avx_movmsk_ps_256},
{"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
{"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
{"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},