summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorge Kyriazis <george.kyriazis@intel.com>2018-04-05 17:51:02 -0500
committerGeorge Kyriazis <george.kyriazis@intel.com>2018-04-18 10:51:38 -0500
commit670a99c233541b21420151f3c664dad712287457 (patch)
tree1c432582e7260ca3978cdd0257941ba72450d560 /src
parentaa482014e542de21785b2a4386ebbfc4a8e8df1d (diff)
swr/rast: add cvt instructions in x86 lowering pass
Support generic VCVTPD2PS and VCVTPH2PS in x86 lowering pass. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py70
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp14
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp14
5 files changed, 48 insertions, 59 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2636e60ae9..4a7d2e9a54 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -42,28 +42,26 @@ inst_aliases = {
}
intrinsics = [
- ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd4FP64Ty'],
- ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdFP32Ty'],
- ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16FP32Ty'],
- ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdInt32Ty'],
- ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16Int32Ty'],
- ['VRCPPS', 'x86_avx_rcp_ps_256', ['a'], 'mSimdFP32Ty'],
- ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding'], 'mSimdFP32Ty'],
- ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control'], 'mInt32Ty'],
- ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b'], 'mSimd32Int8Ty'],
- ['VPERMD', 'x86_avx2_permd', ['a', 'idx'], 'mSimdInt32Ty'],
- ['VPERMPS', 'x86_avx2_permps', ['idx', 'a'], 'mSimdFP32Ty'],
- ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a'], 'mSimdFP32Ty'],
- ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a'], 'mSimdFP32Ty'],
- ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round'], 'mSimdFP16Ty'],
- ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b'], 'mSimdFP32Ty'],
- ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b'], 'mInt32Ty'],
- ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b'], 'mInt32Ty'],
- ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c'], 'mSimdFP32Ty'],
- ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a'], 'mInt32Ty'],
- ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b'], 'mSimdInt32Ty'],
- ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b'], 'mInt32Ty'],
- ['RDTSC', 'x86_rdtsc', [], 'mInt64Ty'],
+ ['VGATHERPD', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+ ['VGATHERPS', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+ ['VGATHERDD', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+ ['VRCPPS', ['a'], 'a'],
+ ['VROUND', ['a', 'rounding'], 'a'],
+ ['BEXTR_32', ['src', 'control'], 'src'],
+ ['VPSHUFB', ['a', 'b'], 'a'],
+ ['VPERMD', ['a', 'idx'], 'a'],
+ ['VPERMPS', ['idx', 'a'], 'a'],
+ ['VCVTPD2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'],
+ ['VCVTPH2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'],
+ ['VCVTPS2PH', ['a', 'round'], 'mSimdFP16Ty'],
+ ['VHSUBPS', ['a', 'b'], 'a'],
+ ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
+ ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
+ ['VFMADDPS', ['a', 'b', 'c'], 'a'],
+ ['VMOVMSKPS', ['a'], 'mInt32Ty'],
+ ['VPHADDD', ['a', 'b'], 'a'],
+ ['PDEP32', ['a', 'b'], 'a'],
+ ['RDTSC', [], 'mInt64Ty'],
]
llvm_intrinsics = [
@@ -231,19 +229,31 @@ def generate_meta_h(output_dir):
functions = []
for inst in intrinsics:
+ name = inst[0]
+ args = inst[1]
+ ret = inst[2]
+
#print('Inst: %s, x86: %s numArgs: %d' % (inst[0], inst[1], len(inst[2])))
- if len(inst[2]) != 0:
- declargs = 'Value* ' + ', Value* '.join(inst[2])
- decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (inst[0], declargs)
+ if len(args) != 0:
+ declargs = 'Value* ' + ', Value* '.join(args)
+ decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (name, declargs)
else:
- decl = 'Value* %s(const llvm::Twine& name = "")' % (inst[0])
+ decl = 'Value* %s(const llvm::Twine& name = "")' % (name)
+
+ # determine the return type of the intrinsic. It can either be:
+ # - type of one of the input arguments
+ # - snippet of code to set the return type
+
+ if ret in args:
+ returnTy = ret + '->getType()'
+ else:
+ returnTy = ret
functions.append({
'decl' : decl,
- 'name' : inst[0],
- 'intrin' : inst[1],
- 'args' : inst[2],
- 'returnType': inst[3]
+ 'name' : name,
+ 'args' : args,
+ 'returnType': returnTy
})
MakoTemplateWriter.to_file(
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index eccf0ad0d6..c7912785b7 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -176,13 +176,6 @@ namespace SwrJit
return VGATHERPS(vSrc, pBase, vIndices, vMask, C(scale));
}
- Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(pBase, usage);
-
- return VGATHERPS_16(vSrc, pBase, vIndices, vMask, C(scale));
- }
-
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
@@ -198,13 +191,6 @@ namespace SwrJit
return VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale));
}
- Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(pBase, usage);
-
- return VGATHERDD_16(vSrc, pBase, vIndices, vMask, C(scale));
- }
-
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index f229da38a9..9ccac4f800 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -78,13 +78,10 @@ void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 0fbfd211ef..8d659d08e0 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1563,10 +1563,8 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
if (bFP)
{
// extract 128 bit lanes to sign extend each component
- /// @todo Force 8-wide cvt until we support generic cvt in x86 lowering pass
- Function* pCvtPh2Ps = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_vcvtph2ps_256);
- Value *temp_lo = CALL(pCvtPh2Ps, BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
- Value *temp_hi = CALL(pCvtPh2Ps, BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
+ Value *temp_lo = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
+ Value *temp_hi = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
vVertexElements[currentVertexElement] = JOIN_16(temp_lo, temp_hi);
}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 11a2397c43..b27335f060 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -76,8 +76,6 @@ namespace SwrJit
{"meta.intrinsic.VROUND", Intrinsic::x86_avx_round_ps_256},
{"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
{"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
- {"meta.intrinsic.VCVTPD2PS", Intrinsic::x86_avx_cvt_pd2_ps_256},
- {"meta.intrinsic.VCVTPH2PS", Intrinsic::x86_vcvtph2ps_256},
{"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
{"meta.intrinsic.VHSUBPS", Intrinsic::x86_avx_hsub_ps_256},
{"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
@@ -101,27 +99,27 @@ namespace SwrJit
{"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
{"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
},
{ // AVX2
{"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
{"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
{"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
},
{ // AVX512
{"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
{"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
{"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
{"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
{"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512 }, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512 }, NO_EMU}},
}
};