//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the X86 AVX512 instruction set, defining the // instructions, and properties of the instructions which are needed for code // generation, machine code emission, and analysis. // //===----------------------------------------------------------------------===// // Group template arguments that can be derived from the vector type (EltNum x // EltVT). These are things like the register class for the writemask, etc. // The idea is to pass one of these as the template argument rather than the // individual arguments. // The template is also used for scalar types, in this case numelts is 1. class X86VectorVTInfo { RegisterClass RC = rc; ValueType EltVT = eltvt; int NumElts = numelts; // Corresponding mask register class. RegisterClass KRC = !cast("VK" # NumElts); // Corresponding write-mask register class. RegisterClass KRCWM = !cast("VK" # NumElts # "WM"); // The mask VT. ValueType KVT = !cast("v" # NumElts # "i1"); // Suffix used in the instruction mnemonic. string Suffix = suffix; // VTName is a string name for vector VT. For vector types it will be // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 // It is a little bit complex for scalar types, where NumElts = 1. // In this case we build v4f32 or v2f64 string VTName = "v" # !if (!eq (NumElts, 1), !if (!eq (EltVT.Size, 32), 4, !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; // The vector VT. ValueType VT = !cast(VTName); string EltTypeName = !cast(EltVT); // Size of the element type in bits, e.g. 32 for v16i32. string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); int EltSize = EltVT.Size; // "i" for integer types and "f" for floating-point types string TypeVariantName = !subst(EltSizeName, "", EltTypeName); // Size of RC in bits, e.g. 512 for VR512. int Size = VT.Size; // The corresponding memory operand, e.g. i512mem for VR512. X86MemOperand MemOp = !cast(TypeVariantName # Size # "mem"); X86MemOperand ScalarMemOp = !cast(EltVT # "mem"); // FP scalar memory operand for intrinsics - ssmem/sdmem. Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast("ssmem"), !if (!eq (EltTypeName, "f64"), !cast("sdmem"), ?)); // Load patterns // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64 // due to load promotion during legalization PatFrag LdFrag = !cast("load" # !if (!eq (TypeVariantName, "i"), !if (!eq (Size, 128), "v2i64", !if (!eq (Size, 256), "v4i64", !if (!eq (Size, 512), "v8i64", VTName))), VTName)); PatFrag AlignedLdFrag = !cast("alignedload" # !if (!eq (TypeVariantName, "i"), !if (!eq (Size, 128), "v2i64", !if (!eq (Size, 256), "v4i64", !if (!eq (Size, 512), "v8i64", VTName))), VTName)); PatFrag ScalarLdFrag = !cast("load" # EltVT); ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), !cast("sse_load_f32"), !if (!eq (EltTypeName, "f64"), !cast("sse_load_f64"), ?)); // The corresponding float type, e.g. v16f32 for v16i32 // Note: For EltSize < 32, FloatVT is illegal and TableGen // fails to compile, so we choose FloatVT = VT ValueType FloatVT = !cast( !if (!eq (!srl(EltSize,5),0), VTName, !if (!eq(TypeVariantName, "i"), "v" # NumElts # "f" # EltSize, VTName))); ValueType IntVT = !cast( !if (!eq (!srl(EltSize,5),0), VTName, !if (!eq(TypeVariantName, "f"), "v" # NumElts # "i" # EltSize, VTName))); // The string to specify embedded broadcast in assembly. string BroadcastStr = "{1to" # NumElts # "}"; // 8-bit compressed displacement tuple/subvector format. This is only // defined for NumElts <= 8. CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), !cast("CD8VT" # NumElts), ?); SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, !if (!eq (Size, 256), sub_ymm, ?)); Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, !if (!eq (EltTypeName, "f64"), SSEPackedDouble, SSEPackedInt)); RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); // A vector tye of the same width with element type i64. This is used to // create patterns for logic ops. ValueType i64VT = !cast("v" # !srl(Size, 6) # "i64"); // A vector type of the same width with element type i32. This is used to // create the canonical constant zero node ImmAllZerosV. ValueType i32VT = !cast("v" # !srl(Size, 5) # "i32"); dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); string ZSuffix = !if (!eq (Size, 128), "Z128", !if (!eq (Size, 256), "Z256", "Z")); } def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; // "x" in v32i8x_info means RC = VR256X def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; // We map scalar types to the smallest (128-bit) vector type // with the appropriate element type. This allows to use the same masking logic. def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; class AVX512VLVectorVTInfo { X86VectorVTInfo info512 = i512; X86VectorVTInfo info256 = i256; X86VectorVTInfo info128 = i128; } def avx512vl_i8_info : AVX512VLVectorVTInfo; def avx512vl_i16_info : AVX512VLVectorVTInfo; def avx512vl_i32_info : AVX512VLVectorVTInfo; def avx512vl_i64_info : AVX512VLVectorVTInfo; def avx512vl_f32_info : AVX512VLVectorVTInfo; def avx512vl_f64_info : AVX512VLVectorVTInfo; class X86KVectorVTInfo { RegisterClass KRC = _krc; RegisterClass KRCWM = _krcwm; ValueType KVT = _vt; } def v1i1_info : X86KVectorVTInfo; def v2i1_info : X86KVectorVTInfo; def v4i1_info : X86KVectorVTInfo; def v8i1_info : X86KVectorVTInfo; def v16i1_info : X86KVectorVTInfo; def v32i1_info : X86KVectorVTInfo; def v64i1_info : X86KVectorVTInfo; // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as // template arguments. multiclass AVX512_maskable_custom O, Format F, dag Outs, dag Ins, dag MaskingIns, dag ZeroMaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, list Pattern, list MaskingPattern, list ZeroMaskingPattern, string MaskingConstraint = "", bit IsCommutable = 0, bit IsKCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; // Prefer over VMOV*rrk Pat<> let isCommutable = IsKCommutable in def NAME#k: AVX512, EVEX_K { // In case of the 3src subclass this is overridden with a let. string Constraints = MaskingConstraint; } // Zero mask does not add any restrictions to commute operands transformation. // So, it is Ok to use IsCommutable instead of IsKCommutable. let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<> def NAME#kz: AVX512, EVEX_KZ; } // Common base class of AVX512_maskable and AVX512_maskable_3src. multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, dag MaskingIns, dag ZeroMaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, SDNode Select = vselect, string MaskingConstraint = "", bit IsCommutable = 0, bit IsKCommutable = 0> : AVX512_maskable_custom; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. // This version uses a separate dag for non-masking and masking. multiclass AVX512_maskable_split O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskRHS, bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect> : AVX512_maskable_custom; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect> : AVX512_maskable_common; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the scalar instruction. multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0> : AVX512_maskable; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude // $src1. multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect, bit MaskOnly = 0> : AVX512_maskable_common; // Similar to AVX512_maskable_3src but in this case the input VT for the tied // operand differs from the output VT. This requires a bitconvert on // the preserved vector going into the vselect. // NOTE: The unmasked pattern is disabled. multiclass AVX512_maskable_3src_cast O, Format F, X86VectorVTInfo OutVT, X86VectorVTInfo InVT, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0> : AVX512_maskable_common; multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, bit MaskOnly = 0> : AVX512_maskable_3src; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, list Pattern> : AVX512_maskable_custom; multiclass AVX512_maskable_3src_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, list Pattern> : AVX512_maskable_custom; // Instruction with mask that puts result in mask register, // like "compare" and "vptest" multiclass AVX512_maskable_custom_cmp O, Format F, dag Outs, dag Ins, dag MaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, list Pattern, list MaskingPattern, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; def NAME#k: AVX512, EVEX_K; } multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, dag MaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, bit IsCommutable = 0> : AVX512_maskable_custom_cmp; multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0> : AVX512_maskable_common_cmp; multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm> : AVX512_maskable_custom_cmp; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. multiclass AVX512_maskable_logic O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskedRHS, bit IsCommutable = 0, SDNode Select = vselect> : AVX512_maskable_custom; // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then // swizzled by ExecutionDomainFix to pxor. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllZerosV))]>; def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), (ins VK16WM:$mask), "", [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), (v16i32 immAllOnesV), (v16i32 immAllZerosV)))]>; def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), (ins VK8WM:$mask), "", [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), (bc_v8i64 (v16i32 immAllOnesV)), (bc_v8i64 (v16i32 immAllZerosV))))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", [(set VR128X:$dst, (v4i32 immAllZerosV))]>; def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", [(set FR64X:$dst, fpimm0)]>; } //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // // Supports two different pattern operators for mask and unmasked ops. Allows // null_frag to be passed for one. multiclass vinsert_for_size_split { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V, EVEX_CD8, Sched<[sched.Folded, ReadAfterLd]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vinsert_for_size : vinsert_for_size_split; multiclass vinsert_for_size_lowering p> { let Predicates = p in { def : Pat<(vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), (To.VT (!cast(InstrStr#"rr") To.RC:$src1, From.RC:$src2, (INSERT_get_vinsert_imm To.RC:$ins)))>; def : Pat<(vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT (bitconvert (From.LdFrag addr:$src2))), (iPTR imm)), (To.VT (!cast(InstrStr#"rm") To.RC:$src1, addr:$src2, (INSERT_get_vinsert_imm To.RC:$ins)))>; } } multiclass vinsert_for_type { let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vinsert_for_size, X86VectorVTInfo< 8, EltVT32, VR256X>, vinsert128_insert, sched>, EVEX_V256; defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, vinsert128_insert, sched>, EVEX_V512; defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, vinsert256_insert, sched>, VEX_W, EVEX_V512; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vinsert_for_size_split, X86VectorVTInfo< 4, EltVT64, VR256X>, null_frag, vinsert128_insert, sched>, VEX_W, EVEX_V256; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { defm NAME # "64x2Z" : vinsert_for_size_split, X86VectorVTInfo< 8, EltVT64, VR512>, null_frag, vinsert128_insert, sched>, VEX_W, EVEX_V512; defm NAME # "32x8Z" : vinsert_for_size_split, X86VectorVTInfo<16, EltVT32, VR512>, null_frag, vinsert256_insert, sched>, EVEX_V512; } } // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? defm VINSERTF : vinsert_for_type; defm VINSERTI : vinsert_for_type; // Codegen pattern with the alternative types, // Even with AVX512DQ we'll still use these for unmasked operations. defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; // Codegen pattern with the alternative types insert VEC128 into VEC256 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; // Codegen pattern with the alternative types insert VEC128 into VEC512 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; // Codegen pattern with the alternative types insert VEC256 into VEC512 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; multiclass vinsert_for_mask_cast p> { let Predicates = p in { def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm))), Cast.RC:$src0)), (!cast(InstrStr#"rrk") Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT (bitconvert (From.LdFrag addr:$src2))), (iPTR imm))), Cast.RC:$src0)), (!cast(InstrStr#"rmk") Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm))), Cast.ImmAllZerosV)), (!cast(InstrStr#"rrkz") Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (vinsert_insert:$ins (To.VT To.RC:$src1), (From.VT (bitconvert (From.LdFrag addr:$src2))), (iPTR imm))), Cast.ImmAllZerosV)), (!cast(InstrStr#"rmkz") Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; } } defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, v8f32x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, v4f64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, v8i32x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, v8i32x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, v8i32x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, v4i64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, v4i64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, v4i64x_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, v16f32_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, v8f64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, v16i32_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, v16i32_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, v16i32_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, v8i64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, v8i64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, v8i64_info, vinsert128_insert, INSERT_get_vinsert128_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, v16f32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, v8f64_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, v16i32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, v16i32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, v16i32_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasDQI]>; defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, v8i64_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, v8i64_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, v8i64_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; // vinsertps - insert f32 to XMM let ExeDomain = SSEPackedSingle in { def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; } //===----------------------------------------------------------------------===// // AVX-512 VECTOR EXTRACT //--- // Supports two different pattern operators for mask and unmasked ops. Allows // null_frag to be passed for one. multiclass vextract_for_size_split { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, AVX512AIi8Base, EVEX, Sched<[SchedRR]>; def mr : AVX512AIi8, EVEX, Sched<[SchedMR]>; let mayStore = 1, hasSideEffects = 0 in def mrk : AVX512AIi8, EVEX_K, EVEX, Sched<[SchedMR]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vextract_for_size : vextract_for_size_split; // Codegen pattern for the alternative types multiclass vextract_for_size_lowering p> { let Predicates = p in { def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), (To.VT (!cast(InstrStr#"rr") From.RC:$src1, (EXTRACT_get_vextract_imm To.RC:$ext)))>; def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm))), addr:$dst), (!cast(InstrStr#"mr") addr:$dst, From.RC:$src1, (EXTRACT_get_vextract_imm To.RC:$ext))>; } } multiclass vextract_for_type { let Predicates = [HasAVX512] in { defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, vextract128_extract, SchedRR, SchedMR>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm NAME # "64x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT64, VR256X>, vextract256_extract, SchedRR, SchedMR>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; } let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, vextract128_extract, SchedRR, SchedMR>, EVEX_V256, EVEX_CD8<32, CD8VT4>; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vextract_for_size_split, X86VectorVTInfo< 2, EltVT64, VR128X>, null_frag, vextract128_extract, SchedRR, SchedMR>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { defm NAME # "64x2Z" : vextract_for_size_split, X86VectorVTInfo< 2, EltVT64, VR128X>, null_frag, vextract128_extract, SchedRR, SchedMR>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm NAME # "32x8Z" : vextract_for_size_split, X86VectorVTInfo< 8, EltVT32, VR256X>, null_frag, vextract256_extract, SchedRR, SchedMR>, EVEX_V512, EVEX_CD8<32, CD8VT8>; } } // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. defm VEXTRACTF : vextract_for_type; defm VEXTRACTI : vextract_for_type; // extract_subvector codegen patterns with the alternative types. // Even with AVX512DQ we'll still use these for unmasked operations. defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; // Codegen pattern with the alternative types extract VEC128 from VEC256 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; // Codegen pattern with the alternative types extract VEC128 from VEC512 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; // Codegen pattern with the alternative types extract VEC256 from VEC512 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; // A 128-bit extract from bits [255:128] of a 512-bit vector should use a // smaller extract to enable EVEX->VEX. let Predicates = [NoVLX] in { def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), (v2i64 (VEXTRACTI128rr (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), (v2f64 (VEXTRACTF128rr (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), (v4i32 (VEXTRACTI128rr (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), (v4f32 (VEXTRACTF128rr (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), (v8i16 (VEXTRACTI128rr (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), (v16i8 (VEXTRACTI128rr (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), (iPTR 1)))>; } // A 128-bit extract from bits [255:128] of a 512-bit vector should use a // smaller extract to enable EVEX->VEX. let Predicates = [HasVLX] in { def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), (v2i64 (VEXTRACTI32x4Z256rr (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), (v2f64 (VEXTRACTF32x4Z256rr (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), (v4i32 (VEXTRACTI32x4Z256rr (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), (v4f32 (VEXTRACTF32x4Z256rr (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), (v8i16 (VEXTRACTI32x4Z256rr (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), (iPTR 1)))>; def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), (v16i8 (VEXTRACTI32x4Z256rr (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), (iPTR 1)))>; } // Additional patterns for handling a bitcast between the vselect and the // extract_subvector. multiclass vextract_for_mask_cast p> { let Predicates = p in { def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (To.VT (vextract_extract:$ext (From.VT From.RC:$src), (iPTR imm)))), To.RC:$src0)), (Cast.VT (!cast(InstrStr#"rrk") Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, (EXTRACT_get_vextract_imm To.RC:$ext)))>; def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (To.VT (vextract_extract:$ext (From.VT From.RC:$src), (iPTR imm)))), Cast.ImmAllZerosV)), (Cast.VT (!cast(InstrStr#"rrkz") Cast.KRCWM:$mask, From.RC:$src, (EXTRACT_get_vextract_imm To.RC:$ext)))>; } } defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, v4f32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, v2f64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, v4f32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, v2f64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, v4i32x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, v2i64x_info, vextract128_extract, EXTRACT_get_vextract128_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, v8f32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, v4f64x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, v8i32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, v8i32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, v8i32x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasDQI]>; defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, v4i64x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, v4i64x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, v4i64x_info, vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- // broadcast with a scalar argument. multiclass avx512_broadcast_scalar opc, string OpcodeStr, string Name, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), (!cast(Name#DestInfo.ZSuffix#r) (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, (X86VBroadcast SrcInfo.FRC:$src), DestInfo.RC:$src0)), (!cast(Name#DestInfo.ZSuffix#rk) DestInfo.RC:$src0, DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, (X86VBroadcast SrcInfo.FRC:$src), DestInfo.ImmAllZerosV)), (!cast(Name#DestInfo.ZSuffix#rkz) DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>; } // Split version to allow mask and broadcast node to be different types. This // helps support the 32x2 broadcasts. multiclass avx512_broadcast_rm_split opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, SDPatternOperator UnmaskedOp = X86VBroadcast> { let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { defm r : AVX512_maskable_split, T8PD, EVEX, Sched<[SchedRR]>; let mayLoad = 1 in defm m : AVX512_maskable_split, T8PD, EVEX, EVEX_CD8, Sched<[SchedRM]>; } def : Pat<(MaskInfo.VT (bitconvert (DestInfo.VT (UnmaskedOp (SrcInfo.VT (scalar_to_vector (SrcInfo.ScalarLdFrag addr:$src))))))), (!cast(Name#MaskInfo.ZSuffix#m) addr:$src)>; def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, (bitconvert (DestInfo.VT (X86VBroadcast (SrcInfo.VT (scalar_to_vector (SrcInfo.ScalarLdFrag addr:$src)))))), MaskInfo.RC:$src0)), (!cast(Name#DestInfo.ZSuffix#mk) MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, (bitconvert (DestInfo.VT (X86VBroadcast (SrcInfo.VT (scalar_to_vector (SrcInfo.ScalarLdFrag addr:$src)))))), MaskInfo.ImmAllZerosV)), (!cast(Name#MaskInfo.ZSuffix#mkz) MaskInfo.KRCWM:$mask, addr:$src)>; } // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> : avx512_broadcast_rm_split; multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V512; } let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V256; } } multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V512; } let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V256; defm Z128 : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V128; } } defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", avx512vl_f32_info>; defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", avx512vl_f64_info>, VEX_W; multiclass avx512_int_broadcast_reg opc, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC> { let ExeDomain = _.ExeDomain in defm r : AVX512_maskable, T8PD, EVEX, Sched<[SchedRR]>; } multiclass avx512_int_broadcastbw_reg opc, string Name, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg> { let hasSideEffects = 0, ExeDomain = _.ExeDomain in defm r : AVX512_maskable_custom, T8PD, EVEX, Sched<[SchedRR]>; def : Pat <(_.VT (OpNode SrcRC:$src)), (!cast(Name#r) (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), (!cast(Name#rk) _.RC:$src0, _.KRCWM:$mask, (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), (!cast(Name#rkz) _.KRCWM:$mask, (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; } multiclass avx512_int_broadcastbw_reg_vl opc, string Name, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { let Predicates = [prd] in defm Z : avx512_int_broadcastbw_reg, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; } } multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { let Predicates = [prd] in defm Z : avx512_int_broadcast_reg, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_int_broadcast_reg, EVEX_V256; defm Z128 : avx512_int_broadcast_reg, EVEX_V128; } } defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; // Provide aliases for broadcast from the same register class that // automatically does the extract. multiclass avx512_int_broadcast_rm_lowering { def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), (!cast(Name#DestInfo.ZSuffix#"r") (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>; } multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd> { let Predicates = [prd] in { defm Z : avx512_broadcast_rm, avx512_int_broadcast_rm_lowering, EVEX_V512; // Defined separately to avoid redefinition. defm Z_Alt : avx512_int_broadcast_rm_lowering; } let Predicates = [prd, HasVLX] in { defm Z256 : avx512_broadcast_rm, avx512_int_broadcast_rm_lowering, EVEX_V256; defm Z128 : avx512_broadcast_rm, EVEX_V128; } } defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", avx512vl_i8_info, HasBWI>; defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", avx512vl_i16_info, HasBWI>; defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", avx512vl_i32_info, HasAVX512>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", avx512vl_i64_info, HasAVX512>, VEX_W; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { defm rm : AVX512_maskable, Sched<[SchedWriteShuffle.YMM.Folded]>, AVX5128IBase, EVEX; } // This should be used for the AVX512DQ broadcast instructions. It disables // the unmasked patterns so that we only use the DQ instructions when masking // is requested. multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_split, Sched<[SchedWriteShuffle.YMM.Folded]>, AVX5128IBase, EVEX; } let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))), (VPBROADCASTQZm addr:$src)>; } let Predicates = [HasVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQZ128m addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), (VPBROADCASTWZ128m addr:$src)>; def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), (VPBROADCASTWZ256m addr:$src)>; def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZ128m addr:$src)>; def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZ256m addr:$src)>; } //===----------------------------------------------------------------------===// // AVX-512 BROADCAST SUBVECTORS // defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", v16i32_info, v4i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", v16f32_info, v4f32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", v8i64_info, v4i64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", v8f64_info, v4f64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; let Predicates = [HasAVX512] in { def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), (VBROADCASTF64X4rm addr:$src)>; def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))), (VBROADCASTI64X4rm addr:$src)>; def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))), (VBROADCASTI64X4rm addr:$src)>; def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))), (VBROADCASTI64X4rm addr:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v4f64 VR256X:$src), 1)>; def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v8f32 VR256X:$src), 1)>; def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v4i64 VR256X:$src), 1)>; def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v8i32 VR256X:$src), 1)>; def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v16i16 VR256X:$src), 1)>; def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v32i8 VR256X:$src), 1)>; def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), (VBROADCASTF32X4rm addr:$src)>; def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), (VBROADCASTI32X4rm addr:$src)>; def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), (VBROADCASTI32X4rm addr:$src)>; def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), (VBROADCASTI32X4rm addr:$src)>; // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), (bc_v16f32 (v16i32 immAllZerosV))), (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), VR512:$src0), (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), (v16i32 immAllZerosV)), (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), VR512:$src0), (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), (bc_v8f64 (v16i32 immAllZerosV))), (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), VR512:$src0), (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), (bc_v8i64 (v16i32 immAllZerosV))), (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), VR512:$src0), (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", v8i32x_info, v4i32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", v8f32x_info, v4f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), (VBROADCASTF32X4Z256rm addr:$src)>; def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), (VBROADCASTI32X4Z256rm addr:$src)>; def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), (VBROADCASTI32X4Z256rm addr:$src)>; def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), (VBROADCASTI32X4Z256rm addr:$src)>; // Patterns for selects of bitcasted operations. def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), (bc_v8f32 (v8i32 immAllZerosV))), (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), VR256X:$src0), (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), (v8i32 immAllZerosV)), (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), VR256X:$src0), (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v2f64 VR128X:$src), 1)>; def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v4f32 VR128X:$src), 1)>; def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v2i64 VR128X:$src), 1)>; def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v4i32 VR128X:$src), 1)>; def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v8i16 VR128X:$src), 1)>; def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (v16i8 VR128X:$src), 1)>; } let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", v4i64x_info, v2i64x_info>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", v4f64x_info, v2f64x_info>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; // Patterns for selects of bitcasted operations. def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), (bc_v4f64 (v8i32 immAllZerosV))), (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), VR256X:$src0), (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), (bc_v4i64 (v8i32 immAllZerosV))), (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), VR256X:$src0), (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; } let Predicates = [HasDQI] in { defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", v8i64_info, v2i64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", v16i32_info, v8i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT8>; defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", v8f64_info, v2f64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", v16f32_info, v8f32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT8>; // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), (bc_v16f32 (v16i32 immAllZerosV))), (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), VR512:$src0), (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), (v16i32 immAllZerosV)), (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), VR512:$src0), (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), (bc_v8f64 (v16i32 immAllZerosV))), (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), VR512:$src0), (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), (bc_v8i64 (v16i32 immAllZerosV))), (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), VR512:$src0), (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split, EVEX_V256; } multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : avx512_common_broadcast_32x2 { let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, EVEX_V128; } defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", avx512vl_i32_info, avx512vl_i64_info>; defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", avx512vl_f32_info, avx512vl_f64_info>; let Predicates = [HasVLX] in { def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>; def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; } def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>; def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- multiclass avx512_mask_broadcastm opc, string OpcodeStr, X86VectorVTInfo _, RegisterClass KRC> { def rr : AVX512XS8I, EVEX, Sched<[WriteShuffle]>; } multiclass avx512_mask_broadcast opc, string OpcodeStr, AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { let Predicates = [HasCDI] in defm Z : avx512_mask_broadcastm, EVEX_V512; let Predicates = [HasCDI, HasVLX] in { defm Z256 : avx512_mask_broadcastm, EVEX_V256; defm Z128 : avx512_mask_broadcastm, EVEX_V128; } } defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", avx512vl_i32_info, VK16>; defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", avx512vl_i64_info, VK8>, VEX_W; //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- multiclass avx512_perm_i opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr: AVX512_maskable_3src_cast, EVEX_4V, AVX5128IBase, Sched<[sched]>; let mayLoad = 1 in defm rm: AVX512_maskable_3src_cast, EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_perm_i_mb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, mayLoad = 1 in defm rmb: AVX512_maskable_3src_cast, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_perm_i_sizes opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { defm NAME: avx512_perm_i, avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { defm NAME#128: avx512_perm_i, avx512_perm_i_mb, EVEX_V128; defm NAME#256: avx512_perm_i, avx512_perm_i_mb, EVEX_V256; } } multiclass avx512_perm_i_sizes_bw opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [Prd, HasVLX] in { defm NAME#128: avx512_perm_i, EVEX_V128; defm NAME#256: avx512_perm_i, EVEX_V256; } } defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, avx512vl_i16_info, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; // Extra patterns to deal with extra bitcasts due to passthru and index being // different types on the fp versions. multiclass avx512_perm_i_lowering { def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86VPermt2 (_.VT _.RC:$src2), (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86VPermt2 _.RC:$src2, (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), (_.LdFrag addr:$src3)), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86VPermt2 _.RC:$src2, (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), (X86VBroadcast (_.ScalarLdFrag addr:$src3))), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; } // TODO: Should we add more casts? The vXi64 case is common due to ABI. defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; // VPERMT2 multiclass avx512_perm_t opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_3src, EVEX_4V, AVX5128IBase, Sched<[sched]>; defm rm: AVX512_maskable_3src, EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_perm_t_mb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rmb: AVX512_maskable_3src, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_perm_t_sizes opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { defm NAME: avx512_perm_t, avx512_perm_t_mb, EVEX_V512; let Predicates = [HasVLX] in { defm NAME#128: avx512_perm_t, avx512_perm_t_mb, EVEX_V128; defm NAME#256: avx512_perm_t, avx512_perm_t_mb, EVEX_V256; } } multiclass avx512_perm_t_sizes_bw opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in defm NAME: avx512_perm_t, EVEX_V512; let Predicates = [Prd, HasVLX] in { defm NAME#128: avx512_perm_t, EVEX_V128; defm NAME#256: avx512_perm_t, EVEX_V256; } } defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, avx512vl_i16_info, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // multiclass WriteFVarBlendask opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { def rr : AVX5128I, EVEX_4V, Sched<[sched]>; def rrk : AVX5128I, EVEX_4V, EVEX_K, Sched<[sched]>; def rrkz : AVX5128I, EVEX_4V, EVEX_KZ, Sched<[sched]>; let mayLoad = 1 in { def rm : AVX5128I, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX5128I, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; def rmkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } } } multiclass WriteFVarBlendask_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; def rmbkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; def rmb : AVX5128I, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass blendmask_dq opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { defm Z : WriteFVarBlendask, WriteFVarBlendask_rmb, EVEX_V512; let Predicates = [HasVLX] in { defm Z256 : WriteFVarBlendask, WriteFVarBlendask_rmb, EVEX_V256; defm Z128 : WriteFVarBlendask, WriteFVarBlendask_rmb, EVEX_V128; } } multiclass blendmask_bw opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in defm Z : WriteFVarBlendask, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { defm Z256 : WriteFVarBlendask, EVEX_V256; defm Z128 : WriteFVarBlendask, EVEX_V128; } } defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, avx512vl_f32_info>; defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, avx512vl_f64_info>, VEX_W; defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, avx512vl_i32_info>; defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, avx512vl_i64_info>, VEX_W; defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, avx512vl_i8_info>; defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// // Compare Instructions //===----------------------------------------------------------------------===// // avx512_cmp_scalar - AVX512 CMPSS and CMPSD multiclass avx512_cmp_scalar { defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc)>, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, ReadAfterLd]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (OpNodeRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B, Sched<[sched]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs VK1:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, ReadAfterLd]>; defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, EVEX_4V, EVEX_B, Sched<[sched]>; }// let isAsmParserOnly = 1, hasSideEffects = 0 let isCodeGenOnly = 1 in { let isCommutable = 1 in def rr : AVX512Ii8<0xC2, MRMSrcReg, (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", _.Suffix, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, imm:$cc))]>, EVEX_4V, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", _.Suffix, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), imm:$cc))]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX512] in { let ExeDomain = SSEPackedSingle in defm VCMPSSZ : avx512_cmp_scalar, AVX512XSIi8Base; let ExeDomain = SSEPackedDouble in defm VCMPSDZ : avx512_cmp_scalar, AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, PatFrag OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> { let isCommutable = IsCommutable in def rr : AVX512BI, EVEX_4V, Sched<[sched]>; def rm : AVX512BI, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; let isCommutable = IsCommutable in def rrk : AVX512BI, EVEX_4V, EVEX_K, Sched<[sched]>; def rmk : AVX512BI, EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, PatFrag OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> : avx512_icmp_packed { def rmb : AVX512BI, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmbk : AVX512BI, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_vl opc, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in defm Z : avx512_icmp_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_icmp_packed, EVEX_V256; defm Z128 : avx512_icmp_packed, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in defm Z : avx512_icmp_packed_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_icmp_packed_rmb, EVEX_V256; defm Z128 : avx512_icmp_packed_rmb, EVEX_V128; } } // This fragment treats X86cmpm as commutable to help match loads in both // operands for PCMPEQ. def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), (X86cmpm_c node:$src1, node:$src2, (i8 0))>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 6))>; // FIXME: Is there a better scheduler class for VPCMP? defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, SchedWriteVecALU, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; // Transforms to swizzle an immediate to help matching memory operand in first // operand. def CommutePCMPCC : SDNodeXFormgetZExtValue() & 0x7; Imm = X86::getSwappedVPCMPImm(Imm); return getI8Imm(Imm, SDLoc(N)); }]>; multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name> { let isCommutable = 1 in def rri : AVX512AIi8, EVEX_4V, Sched<[sched]>; def rmi : AVX512AIi8, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; let isCommutable = 1 in def rrik : AVX512AIi8, EVEX_4V, EVEX_K, Sched<[sched]>; def rmik : AVX512AIi8, EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { def rri_alt : AVX512AIi8, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in def rmi_alt : AVX512AIi8, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; def rrik_alt : AVX512AIi8, EVEX_4V, EVEX_K, Sched<[sched]>; let mayLoad = 1 in def rmik_alt : AVX512AIi8, EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), imm:$cc), (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, (CommutePCMPCC imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), imm:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name> : avx512_icmp_cc { def rmib : AVX512AIi8, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmibk : AVX512AIi8, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { def rmib_alt : AVX512AIi8, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmibk_alt : AVX512AIi8, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), imm:$cc), (!cast(Name#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, (CommutePCMPCC imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), imm:$cc)), (!cast(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_icmp_cc, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_icmp_cc, EVEX_V256; defm Z128 : avx512_icmp_cc, EVEX_V128; } } multiclass avx512_icmp_cc_rmb_vl opc, string Suffix, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_icmp_cc_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_icmp_cc_rmb, EVEX_V256; defm Z128 : avx512_icmp_cc_rmb, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPCMP/VPCMPU? defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SchedWriteVecALU, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SchedWriteVecALU, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 1>, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)>, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, (X86cmpm (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), imm:$cc)>, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, Sched<[sched]>; let mayLoad = 1 in { defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $cc">, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } // Patterns for selecting with loads in other operand. def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), CommutableCMPCC:$cc), (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, imm:$cc)>; def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), CommutableCMPCC:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, imm:$cc)>; def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), CommutableCMPCC:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, imm:$cc)>; def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), CommutableCMPCC:$cc)), (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, imm:$cc)>; } multiclass avx512_vcmp_sae { // comparison code form (VCMP[EQ/LT/LE/...] defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (X86cmpmRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, (i32 FROUND_NO_EXC))>, EVEX_B, Sched<[sched]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc">, EVEX_B, Sched<[sched]>; } } multiclass avx512_vcmp { let Predicates = [HasAVX512] in { defm Z : avx512_vcmp_common, avx512_vcmp_sae, EVEX_V512; } let Predicates = [HasAVX512,HasVLX] in { defm Z128 : avx512_vcmp_common, EVEX_V128; defm Z256 : avx512_vcmp_common, EVEX_V256; } } defm VCMPPD : avx512_vcmp, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, CommutableCMPCC:$cc)), (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, CommutableCMPCC:$cc)), (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; } // ---------------------------------------------------------------- // FPClass //handle fpclass instruction mask = op(reg_scalar,imm) // op(mem_scalar,imm) multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, Predicate prd> { let Predicates = [prd], ExeDomain = _.ExeDomain in { def rr : AVX512, Sched<[sched]>; def rrk : AVX512, EVEX_K, Sched<[sched]>; def rm : AVX512, Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX512, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } } //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) // fpclass(reg_vec, mem_vec, imm) // fpclass(reg_vec, broadcast(eltVt), imm) multiclass avx512_vector_fpclass opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string mem, string broadcast>{ let ExeDomain = _.ExeDomain in { def rr : AVX512, Sched<[sched]>; def rrk : AVX512, EVEX_K, Sched<[sched]>; def rm : AVX512, Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX512, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; def rmb : AVX512, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmbk : AVX512, EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_vector_fpclass_all opc, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, string broadcast>{ let Predicates = [prd] in { defm Z : avx512_vector_fpclass, EVEX_V512; } let Predicates = [prd, HasVLX] in { defm Z128 : avx512_vector_fpclass, EVEX_V128; defm Z256 : avx512_vector_fpclass, EVEX_V256; } } multiclass avx512_fp_fpclass_all opcVec, bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, X86SchedWriteWidths sched, Predicate prd> { defm PS : avx512_vector_fpclass_all, EVEX_CD8<32, CD8VF>; defm PD : avx512_vector_fpclass_all, EVEX_CD8<64, CD8VF> , VEX_W; defm SS : avx512_scalar_fpclass, EVEX_CD8<32, CD8VT1>; defm SD : avx512_scalar_fpclass, EVEX_CD8<64, CD8VT1>, VEX_W; } defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, X86Vfpclasss, SchedWriteFCmp, HasDQI>, AVX512AIi8Base, EVEX; //----------------------------------------------------------------- // Mask register copy, including // - copy between mask registers // - load/store mask registers // - copy from GPR to mask register and vice versa // multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, string OpcodeStr, RegisterClass KRC, ValueType vvt, X86MemOperand x86memop> { let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in def kk : I, Sched<[WriteMove]>; def km : I, Sched<[WriteLoad]>; def mk : I, Sched<[WriteStore]>; } multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, string OpcodeStr, RegisterClass KRC, RegisterClass GRC> { let hasSideEffects = 0 in { def kr : I, Sched<[WriteMove]>; def rk : I, Sched<[WriteMove]>; } } let Predicates = [HasDQI] in defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, VEX, PD; let Predicates = [HasAVX512] in defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, VEX, PS; let Predicates = [HasBWI] in { defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, VEX, PD, VEX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, VEX, XD; defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, VEX, PS, VEX_W; defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, VEX, XD, VEX_W; } // GR from/to mask register def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), (COPY_TO_REGCLASS VK16:$src, GR32)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), (COPY_TO_REGCLASS VK8:$src, GR32)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (COPY_TO_REGCLASS VK32:$src, GR32)>; def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (COPY_TO_REGCLASS GR64:$src, VK64)>; def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (COPY_TO_REGCLASS VK64:$src, GR64)>; // Load/store kreg let Predicates = [HasDQI] in { def : Pat<(store VK1:$src, addr:$dst), (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; def : Pat<(v1i1 (load addr:$src)), (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; def : Pat<(v2i1 (load addr:$src)), (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; def : Pat<(v4i1 (load addr:$src)), (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; } let Predicates = [HasAVX512] in { def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; } let Predicates = [HasAVX512] in { multiclass operation_gpr_mask_copy_lowering { def : Pat<(maskVT (scalar_to_vector GR32:$src)), (COPY_TO_REGCLASS GR32:$src, maskRC)>; def : Pat<(maskVT (scalar_to_vector GR8:$src)), (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; } defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; defm : operation_gpr_mask_copy_lowering; def : Pat<(insert_subvector (v16i1 immAllZerosV), (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), (i32 1))), VK16)>; } // Mask unary operation // - KNOT multiclass avx512_mask_unop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, X86FoldableSchedWrite sched, Predicate prd> { let Predicates = [prd] in def rr : I, Sched<[sched]>; } multiclass avx512_mask_unop_all opc, string OpcodeStr, SDPatternOperator OpNode, X86FoldableSchedWrite sched> { defm B : avx512_mask_unop, VEX, PD; defm W : avx512_mask_unop, VEX, PS; defm D : avx512_mask_unop, VEX, PD, VEX_W; defm Q : avx512_mask_unop, VEX, PS, VEX_W; } // TODO - do we need a X86SchedWriteWidths::KMASK type? defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit let Predicates = [HasAVX512, NoDQI] in def : Pat<(vnot VK8:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; def : Pat<(vnot VK4:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; def : Pat<(vnot VK2:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR multiclass avx512_mask_binop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, X86FoldableSchedWrite sched, Predicate prd, bit IsCommutable> { let Predicates = [prd], isCommutable = IsCommutable in def rr : I, Sched<[sched]>; } multiclass avx512_mask_binop_all opc, string OpcodeStr, SDPatternOperator OpNode, X86FoldableSchedWrite sched, bit IsCommutable, Predicate prdW = HasAVX512> { defm B : avx512_mask_binop, VEX_4V, VEX_L, PD; defm W : avx512_mask_binop, VEX_4V, VEX_L, PS; defm D : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PD; defm Q : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PS; } def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; // These nodes use 'vnot' instead of 'not' to support vectors. def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; // TODO - do we need a X86SchedWriteWidths::KMASK type? defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; multiclass avx512_binop_pat { // With AVX512F, 8-bit mask is promoted to 16-bit mask, // for the DQI set, this type is legal and KxxxB instruction is used let Predicates = [NoDQI] in def : Pat<(VOpNode VK8:$src1, VK8:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; // All types smaller than 8 bits require conversion anyway def : Pat<(OpNode VK1:$src1, VK1:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; def : Pat<(VOpNode VK2:$src1, VK2:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK2:$src1, VK16), (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; def : Pat<(VOpNode VK4:$src1, VK4:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK4:$src1, VK16), (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; } defm : avx512_binop_pat; defm : avx512_binop_pat; defm : avx512_binop_pat; defm : avx512_binop_pat; defm : avx512_binop_pat; // Mask unpacking multiclass avx512_mask_unpck { let Predicates = [prd] in { let hasSideEffects = 0 in def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX_4V, VEX_L, Sched<[sched]>; def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), (!cast(NAME##rr) (COPY_TO_REGCLASS KRCSrc:$src2, KRC), (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; } } defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD; defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS; defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, X86FoldableSchedWrite sched, Predicate prd> { let Predicates = [prd], Defs = [EFLAGS] in def rr : I, Sched<[sched]>; } multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, Predicate prdW = HasAVX512> { defm B : avx512_mask_testop, VEX, PD; defm W : avx512_mask_testop, VEX, PS; defm Q : avx512_mask_testop, VEX, PS, VEX_W; defm D : avx512_mask_testop, VEX, PD, VEX_W; } // TODO - do we need a X86SchedWriteWidths::KMASK type? defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; // Mask shift multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in def ri : Ii8, Sched<[sched]>; } multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched> { defm W : avx512_mask_shiftop, VEX, TAPD, VEX_W; let Predicates = [HasDQI] in defm B : avx512_mask_shiftop, VEX, TAPD; let Predicates = [HasBWI] in { defm Q : avx512_mask_shiftop, VEX, TAPD, VEX_W; defm D : avx512_mask_shiftop, VEX, TAPD; } } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; multiclass axv512_icmp_packed_no_vlx_lowering { def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2))), (COPY_TO_REGCLASS (!cast(InstStr#"Zrr") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Frag (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS (!cast(InstStr#"Zrrk") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), Narrow.KRC)>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering { def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), imm:$cc)), (COPY_TO_REGCLASS (!cast(InstStr##Zrri) (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), imm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (OpNode (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), imm:$cc))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), imm:$cc), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; } let Predicates = [HasBWI, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; } // Mask setting all 0s or 1s multiclass avx512_mask_setop { let Predicates = [HasAVX512] in let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, SchedRW = [WriteZero] in def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", [(set KRC:$dst, (VT Val))]>; } multiclass avx512_mask_setop_w { defm W : avx512_mask_setop; defm D : avx512_mask_setop; defm Q : avx512_mask_setop; } defm KSET0 : avx512_mask_setop_w; defm KSET1 : avx512_mask_setop_w; // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; } // Patterns for kmask insert_subvector/extract_subvector to/from index=0 multiclass operation_subvector_mask_lowering { def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), (VT (COPY_TO_REGCLASS subRC:$src, RC))>; } defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; //===----------------------------------------------------------------------===// // AVX-512 - Aligned and unaligned load and store // multiclass avx512_load opc, string OpcodeStr, string Name, X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { let isMoveReg = 1 in def rr : AVX512PI, EVEX, Sched<[Sched.RR]>; def rrkz : AVX512PI, EVEX, EVEX_KZ, Sched<[Sched.RR]>; let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI, EVEX, Sched<[Sched.RM]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { def rrk : AVX512PI, EVEX, EVEX_K, Sched<[Sched.RR]>; def rmk : AVX512PI, EVEX, EVEX_K, Sched<[Sched.RM]>; } def rmkz : AVX512PI, EVEX, EVEX_KZ, Sched<[Sched.RM]>; } def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), (!cast(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), (!cast(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), (!cast(Name#_.ZSuffix##rmk) _.RC:$src0, _.KRCWM:$mask, addr:$ptr)>; } multiclass avx512_alignedload_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, bit NoRMPattern = 0> { let Predicates = [prd] in defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load, EVEX_V256; defm Z128 : avx512_load, EVEX_V128; } } multiclass avx512_load_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load, EVEX_V256; defm Z128 : avx512_load, EVEX_V128; } } multiclass avx512_store opc, string OpcodeStr, string BaseName, X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, string Name, X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { let hasSideEffects = 0 in { let isMoveReg = 1 in def rr_REV : AVX512PI, EVEX, FoldGenData, Sched<[Sched.RR]>; def rrk_REV : AVX512PI, EVEX, EVEX_K, FoldGenData, Sched<[Sched.RR]>; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ, FoldGenData, Sched<[Sched.RR]>; } let hasSideEffects = 0, mayStore = 1 in def mr : AVX512PI, EVEX, Sched<[Sched.MR]>; def mrk : AVX512PI, EVEX, EVEX_K, Sched<[Sched.MR]>; def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(BaseName#_.ZSuffix##mrk) addr:$ptr, _.KRCWM:$mask, _.RC:$src)>; } multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, string Name, X86SchedWriteMoveLSWidths Sched, bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, string Name, X86SchedWriteMoveLSWidths Sched, bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, HasAVX512, "VMOVAPS", SchedWriteFMoveLS>, PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, HasAVX512, "VMOVAPD", SchedWriteFMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, "VMOVUPS", SchedWriteFMoveLS>, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, "VMOVUPD", SchedWriteFMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, 1>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, HasAVX512, "VMOVDQA32", SchedWriteVecMoveLS, 1>, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, HasAVX512, "VMOVDQA64", SchedWriteVecMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, "VMOVDQU8", SchedWriteVecMoveLS, 1>, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, "VMOVDQU16", SchedWriteVecMoveLS, 1>, XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, SchedWriteVecMoveLS, 1, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, "VMOVDQU32", SchedWriteVecMoveLS, 1>, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, SchedWriteVecMoveLS, 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, "VMOVDQU64", SchedWriteVecMoveLS>, XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in // expandPostRAPseudos. let isReMaterializable = 1, canFoldAsLoad = 1, isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), "", []>, Sched<[WriteFLoadX]>; def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), "", []>, Sched<[WriteFLoadY]>; def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), "", []>, Sched<[WriteFLoadX]>; def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), "", []>, Sched<[WriteFLoadY]>; } let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), "", []>, Sched<[WriteFStoreX]>; def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), "", []>, Sched<[WriteFStoreY]>; def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), "", []>, Sched<[WriteFStoreX]>; def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), "", []>, Sched<[WriteFStoreY]>; } def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), VK8), VR512:$src)>; def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; // These patterns exist to prevent the above patterns from introducing a second // mask inversion when one already exists. def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), (bc_v8i64 (v16i32 immAllZerosV)), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; multiclass mask_move_lowering { def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), Narrow.RC:$src1, Narrow.RC:$src0)), (EXTRACT_SUBREG (Wide.VT (!cast(InstrStr#"rrk") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), Narrow.SubRegIdx)>; def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), Narrow.RC:$src1, Narrow.ImmAllZerosV)), (EXTRACT_SUBREG (Wide.VT (!cast(InstrStr#"rrkz") (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), Narrow.SubRegIdx)>; } // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't // available. Use a 512-bit operation and extract. let Predicates = [HasAVX512, NoVLX] in { defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } let Predicates = [HasBWI, NoVLX] in { defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; } let Predicates = [HasAVX512] in { // 512-bit store. def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v16i32 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v32i16 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v64i8 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; } let Predicates = [HasVLX] in { // 128-bit store. def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v4i32 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v8i16 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v16i8 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; // 256-bit store. def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v8i32 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v16i16 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v32i8 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; } multiclass masked_move_for_extract { def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (To.VT (extract_subvector (From.VT From.RC:$src), (iPTR 0)))), To.RC:$src0)), (Cast.VT (!cast(InstrStr#"rrk") Cast.RC:$src0, Cast.KRCWM:$mask, (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>; def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, (bitconvert (To.VT (extract_subvector (From.VT From.RC:$src), (iPTR 0)))), Cast.ImmAllZerosV)), (Cast.VT (!cast(InstrStr#"rrkz") Cast.KRCWM:$mask, (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>; } let Predicates = [HasVLX] in { // A masked extract from the first 128-bits of a 256-bit vector can be // implemented with masked move. defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>; defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>; defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>; defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>; // A masked extract from the first 128-bits of a 512-bit vector can be // implemented with masked move. defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>; defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>; defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>; defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>; defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>; // A masked extract from the first 256-bits of a 512-bit vector can be // implemented with masked move. defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>; defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>; defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>; defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>; defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>; defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>; defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>; defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>; } // Move Int Doubleword to Packed Double Int // let ExeDomain = SSEPackedInt in { def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))]>, EVEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))]>, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; let isCodeGenOnly = 1 in { def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64X:$dst, (bitconvert GR64:$src))]>, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64X:$src))]>, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>, EVEX, VEX_W, Sched<[WriteVecStore]>, EVEX_CD8<64, CD8VT1>; } } // ExeDomain = SSEPackedInt // Move Int Doubleword to Single Scalar // let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))]>, EVEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 // Move doubleword from xmm register to r/m32 // let ExeDomain = SSEPackedInt in { def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))]>, EVEX, Sched<[WriteVecMoveToGpr]>; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (extractelt (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt // Move quadword from xmm1 register to r/m64 // let ExeDomain = SSEPackedInt in { def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))]>, PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, Requires<[HasAVX512, In64BitMode]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, PD, EVEX, VEX_W, Sched<[WriteVecStore]>, Requires<[HasAVX512, In64BitMode]>; def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)]>, EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, Requires<[HasAVX512, In64BitMode]>; let hasSideEffects = 0 in def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq.s\t{$src, $dst|$dst, $src}", []>, EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; } // ExeDomain = SSEPackedInt // Move Scalar Single to Double Int // let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))]>, EVEX, Sched<[WriteVecMoveToGpr]>; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 // Move Quadword Int to Packed Quadword Int // let ExeDomain = SSEPackedInt in { def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; } // ExeDomain = SSEPackedInt // Allow "vmovd" but print "vmovq". def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; //===----------------------------------------------------------------------===// // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// multiclass avx512_move_scalar { def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, (_.VT (OpNode _.RC:$src1, _.RC:$src2)), _.ImmAllZerosV)))], _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, (_.VT (OpNode _.RC:$src1, _.RC:$src2)), (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; let mayLoad = 1, hasSideEffects = 0 in { let Constraints = "$src0 = $dst" in def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|", "$dst {${mask}}, $src}"), [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src}"), [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, EVEX, Sched<[WriteFStore]>; let mayStore = 1, hasSideEffects = 0 in def mrk: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; } defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; multiclass avx512_move_scalar_lowering { def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector (_.EltVT (X86selects VK1WM:$mask, (_.EltVT _.FRC:$src1), (_.EltVT _.FRC:$src2))))))), (!cast(InstrStr#rrk) (COPY_TO_REGCLASS _.FRC:$src2, _.RC), VK1WM:$mask, (_.VT _.RC:$src0), (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector (_.EltVT (X86selects VK1WM:$mask, (_.EltVT _.FRC:$src1), (_.EltVT ZeroFP))))))), (!cast(InstrStr#rrkz) VK1WM:$mask, (_.VT _.RC:$src0), (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; } multiclass avx512_store_scalar_lowering { def : Pat<(masked_store addr:$dst, Mask, (_.info512.VT (insert_subvector undef, (_.info128.VT _.info128.RC:$src), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } multiclass avx512_store_scalar_lowering_subreg { def : Pat<(masked_store addr:$dst, Mask, (_.info512.VT (insert_subvector undef, (_.info128.VT _.info128.RC:$src), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } // This matches the more recent codegen from clang that avoids emitting a 512 // bit masked store directly. Codegen will widen 128-bit masked store to 512 // bits on AVX512F only targets. multiclass avx512_store_scalar_lowering_subreg2 { // AVX512F pattern. def : Pat<(masked_store addr:$dst, Mask512, (_.info512.VT (insert_subvector undef, (_.info128.VT _.info128.RC:$src), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; // AVX512VL pattern. def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)), (!cast(InstrStr#mrk) addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } multiclass avx512_load_scalar_lowering { def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, (_.info512.VT (bitconvert (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, (_.info512.VT (insert_subvector undef, (_.info128.VT (X86vzmovl _.info128.RC:$src)), (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; } multiclass avx512_load_scalar_lowering_subreg { def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, (_.info512.VT (bitconvert (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, (_.info512.VT (insert_subvector undef, (_.info128.VT (X86vzmovl _.info128.RC:$src)), (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; } // This matches the more recent codegen from clang that avoids emitting a 512 // bit masked load directly. Codegen will widen 128-bit masked load to 512 // bits on AVX512F only targets. multiclass avx512_load_scalar_lowering_subreg2 { // AVX512F patterns. def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask512, (_.info512.VT (bitconvert (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask512, (_.info512.VT (insert_subvector undef, (_.info128.VT (X86vzmovl _.info128.RC:$src)), (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; // AVX512Vl patterns. def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, (_.info128.VT (bitconvert (v4i32 immAllZerosV))))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, (_.info128.VT (X86vzmovl _.info128.RC:$src)))), (!cast(InstrStr#rmk) _.info128.RC:$src, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; } defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, (v16i1 (insert_subvector (v16i1 immAllZerosV), (v4i1 (extract_subvector (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), (iPTR 0))), (iPTR 0))), (v4i1 (extract_subvector (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), (iPTR 0))), GR8, sub_8bit>; defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, (v8i1 (extract_subvector (v16i1 (insert_subvector (v16i1 immAllZerosV), (v2i1 (extract_subvector (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), (iPTR 0))), (iPTR 0))), (v2i1 (extract_subvector (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, (v16i1 (insert_subvector (v16i1 immAllZerosV), (v4i1 (extract_subvector (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), (iPTR 0))), (iPTR 0))), (v4i1 (extract_subvector (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), (iPTR 0))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, (v8i1 (extract_subvector (v16i1 (insert_subvector (v16i1 immAllZerosV), (v2i1 (extract_subvector (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), (iPTR 0))), (iPTR 0))), (v2i1 (extract_subvector (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), (iPTR 0))), GR8, sub_8bit>; def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask), (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask), (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; let hasSideEffects = 0 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrr">, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", []>, EVEX_K, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrk">, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrkz">, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrr">, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", []>, EVEX_K, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[SchedWriteFShuffle.XMM]>; } let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), (VMOVSDZrr (v2f64 (AVX512_128_SET0)), (COPY_TO_REGCLASS FR64X:$src, VR128X))>; } // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), (SUBREG_TO_REG (i32 0), (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), (SUBREG_TO_REG (i32 0), (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), (SUBREG_TO_REG (i32 0), (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>; def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), (SUBREG_TO_REG (i32 0), (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>; let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; def : Pat<(v4f32 (X86vzload addr:$src)), (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzload addr:$src)), (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; // Represent the same patterns above but in the form they appear for // 512-bit types def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; def : Pat<(v16f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; def : Pat<(v16f32 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; def : Pat<(v8f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; def : Pat<(v8f64 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; } def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2f64 (AVX512_128_SET0)), (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2f64 (AVX512_128_SET0)), (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)), (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)), (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), addr:$dst), (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>; def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))), (VMOVSSZrr VR128X:$src1, (COPY_TO_REGCLASS FR32X:$src2, VR128X))>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))), (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>; def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; } let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (X86vzmovl (v2i64 VR128X:$src))))]>, EVEX, VEX_W; } let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIZrr GR32:$src)>; def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), (VMOV64toPQIZrr GR64:$src)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; def : Pat<(v8i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; } // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVQI2PQIZrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), (VMOVZPQILo2PQIZrr VR128X:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (VMOVQI2PQIZrm addr:$src)>; def : Pat<(v4i64 (X86vzload addr:$src)), (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. def : Pat<(v16i32 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; def : Pat<(v8i64 (X86vzload addr:$src)), (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; } //===----------------------------------------------------------------------===// // AVX-512 - Non-temporals //===----------------------------------------------------------------------===// def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; let Predicates = [HasVLX] in { def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; } multiclass avx512_movnt opc, string OpcodeStr, X86VectorVTInfo _, X86SchedWriteMoveLS Sched, PatFrag st_frag = alignednontemporalstore> { let SchedRW = [Sched.MR], AddedComplexity = 400 in def mr : AVX512PI, EVEX, EVEX_CD8<_.EltSize, CD8VF>; } multiclass avx512_movnt_vl opc, string OpcodeStr, AVX512VLVectorVTInfo VTInfo, X86SchedWriteMoveLSWidths Sched> { let Predicates = [HasAVX512] in defm Z : avx512_movnt, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_movnt, EVEX_V256; defm Z128 : avx512_movnt, EVEX_V128; } } defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, SchedWriteVecMoveLSNT>, PD; defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, SchedWriteFMoveLSNT>, PD, VEX_W; defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, SchedWriteFMoveLSNT>, PS; let Predicates = [HasAVX512], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), (VMOVNTDQZmr addr:$dst, VR512:$src)>; def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), (VMOVNTDQZmr addr:$dst, VR512:$src)>; def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), (VMOVNTDQZmr addr:$dst, VR512:$src)>; def : Pat<(v8f64 (alignednontemporalload addr:$src)), (VMOVNTDQAZrm addr:$src)>; def : Pat<(v16f32 (alignednontemporalload addr:$src)), (VMOVNTDQAZrm addr:$src)>; def : Pat<(v8i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZrm addr:$src)>; } let Predicates = [HasVLX], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; def : Pat<(v4f64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(v8f32 (alignednontemporalload addr:$src)), (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(v4i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; def : Pat<(v2f64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ128rm addr:$src)>; def : Pat<(v4f32 (alignednontemporalload addr:$src)), (VMOVNTDQAZ128rm addr:$src)>; def : Pat<(v2i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ128rm addr:$src)>; } //===----------------------------------------------------------------------===// // AVX-512 - Integer arithmetic // multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> { defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> : avx512_binop_rm { defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in defm Z : avx512_binop_rm, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_binop_rm, EVEX_V256; defm Z128 : avx512_binop_rm, EVEX_V128; } } multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in defm Z : avx512_binop_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_binop_rmb, EVEX_V256; defm Z128 : avx512_binop_rmb, EVEX_V128; } } multiclass avx512_binop_rm_vl_q opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl, EVEX_CD8<32, CD8VF>; } multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl, EVEX_CD8<16, CD8VF>, VEX_WIG; } multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl, EVEX_CD8<8, CD8VF>, VEX_WIG; } multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm Q : avx512_binop_rm_vl_q; defm D : avx512_binop_rm_vl_d; } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm W : avx512_binop_rm_vl_w; defm B : avx512_binop_rm_vl_b; } multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl_dq, avx512_binop_rm_vl_bw; } multiclass avx512_binop_rm2 opc, string OpcodeStr, X86FoldableSchedWrite sched, SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, bit IsCommutable = 0> { defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, SchedWriteVecALU, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SchedWriteVecALU, 0>; defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, SchedWriteVecALU, HasBWI, 1>; defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SchedWriteVecALU, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, SchedWriteVecALU, HasBWI, 1>; defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, SchedWriteVecALU, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, SchedWritePMULLD, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, SchedWriteVecIMul, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, SchedWriteVecIMul, HasDQI, 1>, T8PD; defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SchedWriteVecIMul, HasBWI, 1>, T8PD; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SchedWriteVecALU, HasBWI, 1>; defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, SchedWriteVecIMul, HasAVX512, 1>, T8PD; defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, SchedWriteVecIMul, HasAVX512, 1>; multiclass avx512_binop_all opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo, SDNode OpNode, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in defm NAME#Z : avx512_binop_rm2, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasVLX, prd] in { defm NAME#Z256 : avx512_binop_rm2, EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; defm NAME#Z128 : avx512_binop_rm2, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } } defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, avx512vl_i8_info, avx512vl_i8_info, X86multishift, HasVBMI, 0>, T8PD; multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst, X86FoldableSchedWrite sched> { defm rmb : AVX512_maskable, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_packs_rm opc, string OpcodeStr, SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, bit IsCommutable = 0> { defm rr : AVX512_maskable, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_packs_all_i32_i16 opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm, avx512_packs_rmb, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm, avx512_packs_rmb, EVEX_V256; defm NAME#Z128 : avx512_packs_rm, avx512_packs_rmb, EVEX_V128; } } multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm, EVEX_V512, VEX_WIG; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm, EVEX_V256, VEX_WIG; defm NAME#Z128 : avx512_packs_rm, EVEX_V128, VEX_WIG; } } multiclass avx512_vpmadd opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo _Src, AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm, EVEX_V256; defm NAME#Z128 : avx512_packs_rm, EVEX_V128; } } defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SchedWriteVecALU, HasBWI, 1>, T8PD; defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SchedWriteVecALU, HasBWI, 1>; defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SchedWriteVecALU, HasAVX512, 1>, T8PD; defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SchedWriteVecALU, HasBWI, 1>; defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, SchedWriteVecALU, HasBWI, 1>, T8PD; defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SchedWriteVecALU, HasAVX512, 1>, T8PD; defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SchedWriteVecALU, HasBWI, 1>, T8PD; defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SchedWriteVecALU, HasBWI, 1>; defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SchedWriteVecALU, HasAVX512, 1>, T8PD; defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SchedWriteVecALU, HasBWI, 1>; defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, SchedWriteVecALU, HasBWI, 1>, T8PD; defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SchedWriteVecALU, HasAVX512, 1>, T8PD; // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasDQI, NoVLX] in { def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; } // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasDQI, NoVLX] in { def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; } multiclass avx512_min_max_lowering { def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), (EXTRACT_SUBREG (Instr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), (EXTRACT_SUBREG (Instr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; } let Predicates = [HasAVX512, NoVLX] in { defm : avx512_min_max_lowering; defm : avx512_min_max_lowering; defm : avx512_min_max_lowering; defm : avx512_min_max_lowering; } //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// // OpNodeMsk is the OpNode to use when element size is important. OpNode will // be set to null_frag for 32-bit elements. multiclass avx512_logic_rm opc, string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable = 0> { let hasSideEffects = 0 in defm rr : AVX512_maskable_logic, AVX512BIBase, EVEX_4V, Sched<[sched]>; let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_logic, AVX512BIBase, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } // OpNodeMsk is the OpNode to use where element size is important. So use // for all of the broadcast patterns. multiclass avx512_logic_rmb opc, string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable = 0> : avx512_logic_rm { defm rmb : AVX512_maskable_logic, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_logic_rmb_vl opc, string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeMsk, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, bit IsCommutable = 0> { let Predicates = [HasAVX512] in defm Z : avx512_logic_rmb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_logic_rmb, EVEX_V256; defm Z128 : avx512_logic_rmb, EVEX_V128; } } multiclass avx512_logic_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, bit IsCommutable = 0> { defm Q : avx512_logic_rmb_vl, VEX_W, EVEX_CD8<64, CD8VF>; defm D : avx512_logic_rmb_vl, EVEX_CD8<32, CD8VF>; } defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SchedWriteVecLogic, 1>; defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SchedWriteVecLogic, 1>; defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SchedWriteVecLogic, 1>; defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SchedWriteVecLogic>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, Sched<[sched.Folded, ReadAfterLd]>; } } } multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable = 0> { let ExeDomain = _.ExeDomain in defm rrb_Int : AVX512_maskable_scalar, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, Sched<[sched.Folded, ReadAfterLd]>; } defm rrb_Int : AVX512_maskable_scalar, EVEX_B, Sched<[sched]>; } } multiclass avx512_binop_s_round opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar, avx512_fp_scalar_round, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar, avx512_fp_scalar_round, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } multiclass avx512_binop_s_sae opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SchedWriteFAddSizes, 1>; defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SchedWriteFMulSizes, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SchedWriteFAddSizes, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SchedWriteFDivSizes, 0>; defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, SchedWriteFCmpSizes, 0>; defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, SchedWriteFCmpSizes, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, Sched<[sched]> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, Sched<[sched.Folded, ReadAfterLd]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, SchedWriteFCmp.Scl>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, SchedWriteFCmp.Scl>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr: AVX512_maskable, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in { defm rm: AVX512_maskable, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } } multiclass avx512_fp_round_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrb: AVX512_maskable, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrb: AVX512_maskable, EVEX_4V, EVEX_B, Sched<[sched]>; } multiclass avx512_fp_binop_p opc, string OpcodeStr, SDPatternOperator OpNode, Predicate prd, X86SchedWriteSizes sched, bit IsCommutable = 0> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { defm PSZ128 : avx512_fp_packed, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } } multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd, X86SchedWriteSizes sched> { defm PSZ : avx512_fp_round_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_round_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd, X86SchedWriteSizes sched> { defm PSZ : avx512_fp_sae_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_sae_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, SchedWriteFAddSizes, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, SchedWriteFMulSizes, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SchedWriteFAddSizes>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SchedWriteFDivSizes>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, SchedWriteFCmpSizes, 0>, avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, SchedWriteFCmpSizes, 0>, avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>; let isCodeGenOnly = 1 in { defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, SchedWriteFCmpSizes, 1>; defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, SchedWriteFCmpSizes, 1>; } defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, SchedWriteFLogicSizes, 0>; defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering { let Predicates = [prd] in { // Masked register-register logical operations. def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), _.RC:$src0)), (!cast(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, _.RC:$src2)>; // Masked register-memory logical operations. def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))), _.RC:$src0)), (!cast(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))), _.ImmAllZerosV)), (!cast(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; // Register-broadcast logical operations. def : Pat<(_.i64VT (OpNode _.RC:$src1, (bitconvert (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))), (!cast(InstrStr#rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, (bitconvert (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))))), _.RC:$src0)), (!cast(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (_.i64VT (OpNode _.RC:$src1, (bitconvert (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))))), _.ImmAllZerosV)), (!cast(InstrStr#rmbkz) _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; } } multiclass avx512_fp_logical_lowering_sizes { defm : avx512_fp_logical_lowering; defm : avx512_fp_logical_lowering; defm : avx512_fp_logical_lowering; defm : avx512_fp_logical_lowering; defm : avx512_fp_logical_lowering; defm : avx512_fp_logical_lowering; } defm : avx512_fp_logical_lowering_sizes<"VPAND", and>; defm : avx512_fp_logical_lowering_sizes<"VPOR", or>; defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>; defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>; let Predicates = [HasVLX,HasDQI] in { // Use packed logical operations for scalar ops. def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)), (COPY_TO_REGCLASS (VANDPDZ128rr (COPY_TO_REGCLASS FR64X:$src1, VR128X), (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>; def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)), (COPY_TO_REGCLASS (VORPDZ128rr (COPY_TO_REGCLASS FR64X:$src1, VR128X), (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>; def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)), (COPY_TO_REGCLASS (VXORPDZ128rr (COPY_TO_REGCLASS FR64X:$src1, VR128X), (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>; def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)), (COPY_TO_REGCLASS (VANDNPDZ128rr (COPY_TO_REGCLASS FR64X:$src1, VR128X), (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>; def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)), (COPY_TO_REGCLASS (VANDPSZ128rr (COPY_TO_REGCLASS FR32X:$src1, VR128X), (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>; def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)), (COPY_TO_REGCLASS (VORPSZ128rr (COPY_TO_REGCLASS FR32X:$src1, VR128X), (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>; def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)), (COPY_TO_REGCLASS (VXORPSZ128rr (COPY_TO_REGCLASS FR32X:$src1, VR128X), (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>; def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)), (COPY_TO_REGCLASS (VANDNPSZ128rr (COPY_TO_REGCLASS FR32X:$src1, VR128X), (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>; } multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable, EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_scalar, Sched<[sched]>; defm rm: AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal, X86SchedWriteWidths sched> { defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm SSZ128 : avx512_fp_scalef_scalar, avx512_fp_scalar_round, EVEX_4V,EVEX_CD8<32, CD8VT1>; defm SDZ128 : avx512_fp_scalef_scalar, avx512_fp_scalar_round, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp_scalef_p, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_scalef_p, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_scalef_p, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_scalef_p, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs, SchedWriteFAdd>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// multiclass avx512_vptest opc, string OpcodeStr, PatFrag OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name> { let ExeDomain = _.ExeDomain in { let isCommutable = 1 in defm rr : AVX512_maskable_cmp, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable_cmp, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } // Patterns for compare with 0 that just use the same source twice. def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), (_.KVT (!cast(Name # _.ZSuffix # "rr") _.RC:$src, _.RC:$src))>; def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), (_.KVT (!cast(Name # _.ZSuffix # "rrk") _.KRC:$mask, _.RC:$src, _.RC:$src))>; } multiclass avx512_vptest_mb opc, string OpcodeStr, PatFrag OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable_cmp, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. multiclass avx512_vptest_lowering { def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), _.ImmAllZerosV)), (_.KVT (COPY_TO_REGCLASS (!cast(Name # "Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src1, _.SubRegIdx), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src2, _.SubRegIdx)), _.KRC))>; def : Pat<(_.KVT (and _.KRC:$mask, (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), _.ImmAllZerosV))), (COPY_TO_REGCLASS (!cast(Name # "Zrrk") (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src1, _.SubRegIdx), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src2, _.SubRegIdx)), _.KRC)>; def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), (_.KVT (COPY_TO_REGCLASS (!cast(Name # "Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src, _.SubRegIdx), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src, _.SubRegIdx)), _.KRC))>; def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), (COPY_TO_REGCLASS (!cast(Name # "Zrrk") (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src, _.SubRegIdx), (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src, _.SubRegIdx)), _.KRC)>; } multiclass avx512_vptest_dq_sizes opc, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in defm Z : avx512_vptest, avx512_vptest_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_vptest, avx512_vptest_mb, EVEX_V256; defm Z128 : avx512_vptest, avx512_vptest_mb, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>; defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>; } } multiclass avx512_vptest_dq opc, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched> { defm D : avx512_vptest_dq_sizes; defm Q : avx512_vptest_dq_sizes, VEX_W; } multiclass avx512_vptest_wb opc, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched> { let Predicates = [HasBWI] in { defm WZ: avx512_vptest, EVEX_V512, VEX_W; defm BZ: avx512_vptest, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_vptest, EVEX_V256, VEX_W; defm WZ128: avx512_vptest, EVEX_V128, VEX_W; defm BZ256: avx512_vptest, EVEX_V256; defm BZ128: avx512_vptest, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm BZ256_Alt : avx512_vptest_lowering; defm BZ128_Alt : avx512_vptest_lowering; defm WZ256_Alt : avx512_vptest_lowering; defm WZ128_Alt : avx512_vptest_lowering; } } // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm // as commutable here because we already canonicalized all zeros vectors to the // RHS during lowering. def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 0))>; def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 4))>; multiclass avx512_vptest_all_forms opc_wb, bits<8> opc_dq, string OpcodeStr, PatFrag OpNode, X86SchedWriteWidths sched> : avx512_vptest_wb, avx512_vptest_dq; defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, SchedWriteVecLogic>, T8PD; defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, SchedWriteVecLogic>, T8XS; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm ri : AVX512_maskable, Sched<[sched]>; defm mi : AVX512_maskable, Sched<[sched.Folded]>; } } multiclass avx512_shift_rmbi opc, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm mbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { // src2 is always 128-bit let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_shift_sizes opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, ValueType SrcVT, PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_shift_rrm, EVEX_V512, EVEX_CD8 ; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_rrm, EVEX_V256, EVEX_CD8; defm Z128 : avx512_shift_rrm, EVEX_V128, EVEX_CD8; } } multiclass avx512_shift_types opcd, bits<8> opcq, bits<8> opcw, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm D : avx512_shift_sizes; defm Q : avx512_shift_sizes, VEX_W; defm W : avx512_shift_sizes; } multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in defm Z: avx512_shift_rmi, avx512_shift_rmbi, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256: avx512_shift_rmi, avx512_shift_rmbi, EVEX_V256; defm Z128: avx512_shift_rmi, avx512_shift_rmbi, EVEX_V128; } } multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasBWI] in defm WZ: avx512_shift_rmi, EVEX_V512, VEX_WIG; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_shift_rmi, EVEX_V256, VEX_WIG; defm WZ128: avx512_shift_rmi, EVEX_V128, VEX_WIG; } } multiclass avx512_shift_rmi_dq opcd, bits<8> opcq, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm D: avx512_shift_rmi_sizes, EVEX_CD8<32, CD8VF>; defm Q: avx512_shift_rmi_sizes, EVEX_CD8<64, CD8VF>, VEX_W; } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SchedWriteVecShift>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SchedWriteVecShift>; defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), VR128X:$src2)), sub_ymm)>; def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), VR128X:$src2)), sub_xmm)>; def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; } //===-------------------------------------------------------------------===// // Variable Bit Shifts //===-------------------------------------------------------------------===// multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, AVX5128IBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in defm Z : avx512_var_shift, avx512_var_shift_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_var_shift, avx512_var_shift_mb, EVEX_V256; defm Z128 : avx512_var_shift, avx512_var_shift_mb, EVEX_V128; } } multiclass avx512_var_shift_types opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm D : avx512_var_shift_sizes; defm Q : avx512_var_shift_sizes, VEX_W; } // Use 512bit version to implement 128/256 bit in case NoVLX. multiclass avx512_var_shift_lowering p> { let Predicates = p in { def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), (_.info256.VT _.info256.RC:$src2))), (EXTRACT_SUBREG (!cast(OpcodeStr#"Zrr") (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), (_.info128.VT _.info128.RC:$src2))), (EXTRACT_SUBREG (!cast(OpcodeStr#"Zrr") (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; } } multiclass avx512_var_shift_w opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasBWI] in defm WZ: avx512_var_shift, EVEX_V512, VEX_W; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_var_shift, EVEX_V256, VEX_W; defm WZ128: avx512_var_shift, EVEX_V128, VEX_W; } } defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>, avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>; defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>, avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>; defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>, avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>; defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; defm : avx512_var_shift_lowering; defm : avx512_var_shift_lowering; defm : avx512_var_shift_lowering; defm : avx512_var_shift_lowering; // Special handing for handling VPSRAV intrinsics. multiclass avx512_var_shift_int_lowering p> { let Predicates = p in { def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), (!cast(InstrStr#_.ZSuffix#rr) _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), (!cast(InstrStr#_.ZSuffix##rm) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix#rrk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } multiclass avx512_var_shift_int_lowering_mb p> : avx512_var_shift_int_lowering { let Predicates = p in { def : Pat<(_.VT (X86vsrav _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), (!cast(InstrStr#_.ZSuffix##rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86vsrav _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLVQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), sub_xmm)>; def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLVQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLVDZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), sub_xmm)>; def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLVDZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; } // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORVQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), sub_xmm)>; def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORVQZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORVDZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), sub_xmm)>; def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORVDZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; } //===-------------------------------------------------------------------===// // 1-src variable permutation VPERMW/D/Q //===-------------------------------------------------------------------===// multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in defm Z : avx512_var_shift, avx512_var_shift_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256 : avx512_var_shift, avx512_var_shift_mb, EVEX_V256; } multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in defm Z: avx512_shift_rmi, avx512_shift_rmbi, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256: avx512_shift_rmi, avx512_shift_rmbi, EVEX_V256; } multiclass avx512_vperm_bw opc, string OpcodeStr, Predicate prd, SDNode OpNode, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [prd] in defm Z: avx512_var_shift, EVEX_V512 ; let Predicates = [HasVLX, prd] in { defm Z256: avx512_var_shift, EVEX_V256 ; defm Z128: avx512_var_shift, EVEX_V128 ; } } defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, WriteVarShuffle256, avx512vl_i16_info>, VEX_W; defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, WriteVarShuffle256, avx512vl_i8_info>; defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, WriteVarShuffle256, avx512vl_i32_info>; defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, WriteVarShuffle256, avx512vl_i64_info>, VEX_W; defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, WriteFVarShuffle256, avx512vl_f32_info>; defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", X86VPermi, WriteShuffle256, avx512vl_i64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", X86VPermi, WriteFShuffle256, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPERMIL //===----------------------------------------------------------------------===// multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo Ctrl> { defm rr: AVX512_maskable, T8PD, EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_permil_vec_common OpcVar, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { defm Z : avx512_permil_vec, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_permil_vec, EVEX_V128; defm Z256 : avx512_permil_vec, EVEX_V256; } } multiclass avx512_permil OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ defm NAME: avx512_permil_vec_common; defm NAME: avx512_shift_rmi_sizes, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } let ExeDomain = SSEPackedSingle in defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, avx512vl_i32_info>; let ExeDomain = SSEPackedDouble in defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", X86PShufhw, SchedWriteShuffle>, EVEX, AVX512XSIi8Base; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", X86PShuflw, SchedWriteShuffle>, EVEX, AVX512XDIi8Base; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFB //===----------------------------------------------------------------------===// multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasBWI] in defm Z: avx512_var_shift, EVEX_V512; let Predicates = [HasVLX, HasBWI] in { defm Z256: avx512_var_shift, EVEX_V256; defm Z128: avx512_var_shift, EVEX_V128; } } defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SchedWriteVarShuffle>, VEX_WIG; //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; //===----------------------------------------------------------------------===// // VMOVHPS/PD VMOVLPS Instructions // All patterns was taken from SSS implementation. //===----------------------------------------------------------------------===// multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in def rm : AVX512, Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; } defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps, v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd, v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; let Predicates = [HasAVX512] in { // VMOVHPS patterns def : Pat<(X86Movlhps VR128X:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128X:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; // VMOVHPD patterns def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; // VMOVLPS patterns def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))), (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; // VMOVLPD patterns def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))), (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Movsd VR128X:$src1, (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; } let SchedRW = [WriteFStore] in { def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovhps\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), (bc_v2f64 (v4f32 VR128X:$src))), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT2>; def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovhpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovlps\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT2>; def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovlpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; } // SchedRW let Predicates = [HasAVX512] in { // VMOVHPD patterns def : Pat<(store (f64 (extractelt (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), (iPTR 0))), addr:$dst), (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; // VMOVLPS patterns def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)), addr:$src1), (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; // VMOVLPD patterns def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)), addr:$src1), (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; } //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations // multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { defm Z : avx512_fma3p_213_rm, avx512_fma3_213_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { defm Z256 : avx512_fma3p_213_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; defm Z128 : avx512_fma3p_213_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { defm PS : avx512_fma3p_213_common; defm PD : avx512_fma3p_213_common, VEX_W; } defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { defm Z : avx512_fma3p_231_rm, avx512_fma3_231_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { defm Z256 : avx512_fma3p_231_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; defm Z128 : avx512_fma3p_231_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd > { defm PS : avx512_fma3p_231_common; defm PD : avx512_fma3p_231_common, VEX_W; } defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm m: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { defm Z : avx512_fma3p_132_rm, avx512_fma3_132_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { defm Z256 : avx512_fma3p_132_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; defm Z128 : avx512_fma3p_132_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd > { defm PS : avx512_fma3p_132_common; defm PD : avx512_fma3p_132_common, VEX_W; } defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, dag RHS_r, dag RHS_m, bit MaskOnlyReg> { let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; defm m_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; defm rb_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512FMA3S, Sched<[SchedWriteFMA.Scl]>; def m : AVX512FMA3S, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; }// isCodeGenOnly = 1 }// Constraints = "$src1 = $dst" } multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, string OpcodeStr, SDNode OpNode, SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3, SDNode OpNodeRnds3, X86VectorVTInfo _, string SUFF> { let ExeDomain = _.ExeDomain in { defm NAME#213#SUFF#Z: avx512_fma3s_common; defm NAME#231#SUFF#Z: avx512_fma3s_common; // One pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm NAME#132#SUFF#Z: avx512_fma3s_common; } } multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, string OpcodeStr, SDNode OpNode, SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3, SDNode OpNodeRnds3> { let Predicates = [HasAVX512] in { defm NAME : avx512_fma3s_all, EVEX_CD8<32, CD8VT1>, VEX_LIG; defm NAME : avx512_fma3s_all, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; } } defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1, X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>; defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1, X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>; defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1, X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>; defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1, X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>; multiclass avx512_scalar_fma_patterns { let Predicates = [HasAVX512] in { def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (Op _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3))))), (!cast(Prefix#"213"#Suffix#"Zr_Int") VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (Op _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast(Prefix#"213"#Suffix#"Zr_Intk") VR128X:$src1, VK1WM:$mask, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (Op _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast(Prefix#"231"#Suffix#"Zr_Intk") VR128X:$src1, VK1WM:$mask, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (Op _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3), (_.EltVT ZeroFP)))))), (!cast(Prefix#"213"#Suffix#"Zr_Intkz") VR128X:$src1, VK1WM:$mask, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; } } defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; //===----------------------------------------------------------------------===// // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { multiclass avx512_pmadd52_rm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { // NOTE: The SDNode have the multiply operands first with the add last. // This enables commuted load patterns to be autogenerated by tablegen. let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } } // Constraints = "$src1 = $dst" multiclass avx512_pmadd52_common opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { let Predicates = [HasIFMA] in { defm Z : avx512_pmadd52_rm, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasIFMA] in { defm Z256 : avx512_pmadd52_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; defm Z128 : avx512_pmadd52_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, SchedWriteVecIMul, avx512vl_i64_info>, VEX_W; defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, SchedWriteVecIMul, avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// multiclass avx512_vcvtsi opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm> { let hasSideEffects = 0 in { def rr : SI, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in def rm : SI, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } // hasSideEffects = 0 let isCodeGenOnly = 1 in { def rr_Int : SI, EVEX_4V, Sched<[sched]>; def rm_Int : SI, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; }//isCodeGenOnly = 1 } multiclass avx512_vcvtsi_round opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> { def rrb_Int : SI, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_vcvtsi_common opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm> { defm NAME : avx512_vcvtsi_round, avx512_vcvtsi, VEX_LIG; } let Predicates = [HasAVX512] in { defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp GR32:$src)), (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f32 (sint_to_fp GR64:$src)), (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; def : Pat<(f64 (sint_to_fp GR32:$src)), (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (uint_to_fp GR32:$src)), (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f32 (uint_to_fp GR64:$src)), (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; def : Pat<(f64 (uint_to_fp GR32:$src)), (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f64 (uint_to_fp GR64:$src)), (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; } //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from float/double to integer //===----------------------------------------------------------------------===// multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, X86VectorVTInfo DstVT, SDNode OpNode, X86FoldableSchedWrite sched, string asm, string aliasStr, bit CodeGenOnly = 1> { let Predicates = [HasAVX512] in { def rr_Int : SI, EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : SI, EVEX, VEX_LIG, EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in def rm_Int : SI, EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", (!cast(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; } // Predicates = [HasAVX512] } multiclass avx512_cvt_s_int_round_aliases opc, X86VectorVTInfo SrcVT, X86VectorVTInfo DstVT, SDNode OpNode, X86FoldableSchedWrite sched, string asm, string aliasStr> : avx512_cvt_s_int_round { let Predicates = [HasAVX512] in { def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast(NAME # "rm_Int") DstVT.RC:$dst, SrcVT.IntScalarMemOp:$src), 0, "att">; } // Predicates = [HasAVX512] } // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; // The SSE version of these instructions are disabled for AVX512. // Therefore, the SSE intrinsics are mapped to the AVX512 instructions. let Predicates = [HasAVX512] in { def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))), (VCVTSS2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)), (VCVTSS2SIZrm_Int sse_load_f32:$src)>; def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))), (VCVTSS2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)), (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>; def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))), (VCVTSD2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)), (VCVTSD2SIZrm_Int sse_load_f64:$src)>; def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))), (VCVTSD2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)), (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>; } // HasAVX512 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang // which produce unnecessary vmovs{s,d} instructions let Predicates = [HasAVX512] in { def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))), (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))), (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))), (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))), (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))), (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))), (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))), (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))), (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; } // Predicates = [HasAVX512] // Convert float/double to signed/unsigned int 32/64 with truncation multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, SDNode OpNodeRnd, X86FoldableSchedWrite sched, string aliasStr, bit CodeGenOnly = 1>{ let Predicates = [HasAVX512] in { let isCodeGenOnly = 1 in { def rr : AVX512, EVEX, Sched<[sched]>; def rm : AVX512, EVEX, Sched<[sched.Folded, ReadAfterLd]>; } def rr_Int : AVX512, EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : AVX512, EVEX,VEX_LIG , EVEX_B, Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in def rm_Int : AVX512, EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; def : InstAlias(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; def : InstAlias(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; } //HasAVX512 } multiclass avx512_cvt_s_all_unsigned opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, SDNode OpNodeRnd, X86FoldableSchedWrite sched, string aliasStr> : avx512_cvt_s_all { let Predicates = [HasAVX512] in { def : InstAlias(NAME # "rm_Int") _DstRC.RC:$dst, _SrcRC.IntScalarMemOp:$src), 0, "att">; } } defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; let Predicates = [HasAVX512] in { def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), (VCVTTSS2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)), (VCVTTSS2SIZrm_Int ssmem:$src)>; def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))), (VCVTTSS2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)), (VCVTTSS2SI64Zrm_Int ssmem:$src)>; def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))), (VCVTTSD2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)), (VCVTTSD2SIZrm_Int sdmem:$src)>; def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))), (VCVTTSD2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)), (VCVTTSD2SI64Zrm_Int sdmem:$src)>; } // HasAVX512 //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, X86FoldableSchedWrite sched> { defm rr_Int : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I, EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in def rm : I, EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; } } // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { defm rrb_Int : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; } // Scalar Conversion with rounding control (RC) multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { defm rrb_Int : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, Sched<[sched]>, EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, SDNode OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, XD; } } multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_sae_scalar, EVEX_CD8<32, CD8VT1>, XS; } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86froundRnd, WriteCvtSD2SS, f64x_info, f32x_info>, NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpextRnd, WriteCvtSS2SD, f32x_info, f64x_info>, NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; def : Pat<(f64 (extloadf32 addr:$src)), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; def : Pat<(f64 (extloadf32 addr:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, Requires<[HasAVX512, OptForSpeed]>; def : Pat<(f32 (fpround FR64X:$src)), (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; //===----------------------------------------------------------------------===// // AVX-512 Vector convert from signed/unsigned integer to float/double // and from float/double to signed/unsigned integer //===----------------------------------------------------------------------===// multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp> { defm rr : AVX512_maskable, EVEX, Sched<[sched]>; defm rm : AVX512_maskable, EVEX, Sched<[sched.Folded]>; defm rmb : AVX512_maskable, EVEX, EVEX_B, Sched<[sched.Folded]>; } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable, EVEX, EVEX_B, Sched<[sched]>; } // Conversion with rounding control (RC) multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } // Extend Float to Double multiclass avx512_cvtps2pd opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Truncate Double to Float multiclass avx512_cvtpd2ps opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; def : InstAlias(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; def : InstAlias(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; def : InstAlias(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; } } defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, PS, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; let Predicates = [HasVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), (VCVTPD2PSZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), (VCVTPD2PSZ128rm addr:$src)>; } def : Pat<(v2f64 (extloadv2f32 addr:$src)), (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), (VCVTPS2PDZ256rm addr:$src)>; } // Convert Signed/Unsigned Doubleword to Double multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, X86SchedWriteWidths sched> { // No rounding in this op let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Doubleword to Float multiclass avx512_cvtdq2ps opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttps2dq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword multiclass avx512_cvtps2dq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 // memory forms of these instructions in Asm Parser. They have the same // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; def : InstAlias(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; def : InstAlias(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; def : InstAlias(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; } } // Convert Double to Signed/Unsigned Doubleword multiclass avx512_cvtpd2dq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 // memory forms of these instructions in Asm Parcer. They have the same // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; def : InstAlias(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; def : InstAlias(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; def : InstAlias(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; } } // Convert Double to Signed/Unsigned Quardword multiclass avx512_cvtpd2qq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Double to Signed/Unsigned Quardword with truncation multiclass avx512_cvttpd2qq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Double multiclass avx512_cvtqq2pd opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword multiclass avx512_cvtps2qq opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Float multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 // memory forms of these instructions in Asm Parcer. They have the same // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; def : InstAlias(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; def : InstAlias(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; def : InstAlias(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; } } defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP, SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si, X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, EVEX_CD8<64, CD8VF>; let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; } let Predicates = [HasAVX512, HasVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), (VCVTPD2DQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), (VCVTPD2DQZ128rm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), (VCVTPD2UDQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), (VCVTTPD2DQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), (VCVTTPD2DQZ128rm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), (VCVTTPD2UDQZ128rr VR128X:$src)>; } def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (VCVTDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTUDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (VCVTUDQ2PDZ128rm addr:$src)>; } let Predicates = [HasAVX512] in { def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; } let Predicates = [HasDQI, HasVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), (VCVTQQ2PSZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), (VCVTUQQ2PSZ128rr VR128X:$src)>; } } let Predicates = [HasDQI, NoVLX] in { def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; } //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// multiclass avx512_cvtph2ps { defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD, Sched<[sched]>; defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), "vcvtph2ps", "$src", "$src", (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))))>, T8PD, Sched<[sched.Folded]>; } multiclass avx512_cvtph2ps_sae { defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "{sae}, $src", "$src, {sae}", (X86cvtph2psRnd (_src.VT _src.RC:$src), (i32 FROUND_NO_EXC))>, T8PD, EVEX_B, Sched<[sched]>; } let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps, avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPH2PSZ256 : avx512_cvtph2ps, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VCVTPH2PSZ128rm addr:$src)>; } multiclass avx512_cvtps2ph { defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, $src1", "$src1, $src2", (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)), 0, 0>, AVX512AIi8Base, Sched<[RR]>; let hasSideEffects = 0, mayStore = 1 in { def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[MR]>; def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, EVEX_K, Sched<[MR]>; } } multiclass avx512_cvtps2ph_sae { let hasSideEffects = 0 in defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, EVEX_B, AVX512AIi8Base, Sched<[Sched]>; } let Predicates = [HasAVX512] in { defm VCVTPS2PHZ : avx512_cvtps2ph, avx512_cvtps2ph_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; defm VCVTPS2PHZ128 : avx512_cvtps2ph, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. let Predicates = [HasVLX] in { // Use MXCSR.RC for rounding instead of explicitly specifying the default // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the // configurations we support (the default). However, falling back to MXCSR is // more consistent with other instructions, which are always controlled by it. // It's encoded as 0b100. def : Pat<(fp_to_f16 FR32X:$src), (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>; def : Pat<(f16_to_fp GR16:$src), (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >; def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >; } // Unordered/Ordered scalar fp compare with Sea and set EFLAGS multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, string OpcodeStr, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def rrb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, sse_load_f64, "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable_scalar, EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } } defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SchedWriteFRsqrt.Scl, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable, EVEX, T8PD, Sched<[sched]>; defm m: AVX512_maskable, EVEX, T8PD, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable, EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm PSZ : avx512_fp14_p, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp14_p, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar, Sched<[sched]>; defm rb : AVX512_maskable_scalar, EVEX_B, Sched<[sched]>; defm m : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched> { defm SS : avx512_fp28_s, EVEX_CD8<32, CD8VT1>; defm SD : avx512_fp28_s, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; } defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SchedWriteFRnd.Scl>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable, Sched<[sched]>; defm m : AVX512_maskable, Sched<[sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable, EVEX_B, Sched<[sched]>; } multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm PS : avx512_fp28_p, avx512_fp28_p_round, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p, avx512_fp28_p_round, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp28_p, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp28_p, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp28_p, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp28_p, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX; defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX; defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX; } defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>, avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>, EVEX; multiclass avx512_sqrt_packed_round opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_sqrt_packed opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable, EVEX, Sched<[sched]>; defm m: AVX512_maskable, EVEX, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable, EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_sqrt_packed_all opc, string OpcodeStr, X86SchedWriteSizes sched> { defm PSZ : avx512_sqrt_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_sqrt_packed, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_sqrt_packed, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_sqrt_packed, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_sqrt_packed, EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_sqrt_packed, EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; } } multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, X86SchedWriteSizes sched> { defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; } multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name, Intrinsic Intr> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { def r : I, Sched<[sched]>; let mayLoad = 1 in def m : I, Sched<[sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX512] in { def : Pat<(_.EltVT (fsqrt _.FRC:$src)), (!cast(Name#Zr) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; def : Pat<(Intr VR128X:$src), (!cast(Name#Zr_Int) VR128X:$src, VR128X:$src)>; } let Predicates = [HasAVX512, OptForSize] in { def : Pat<(_.EltVT (fsqrt (load addr:$src))), (!cast(Name#Zm) (_.EltVT (IMPLICIT_DEF)), addr:$src)>; def : Pat<(Intr _.ScalarIntMemCPat:$src2), (!cast(Name#Zm_Int) (_.VT (IMPLICIT_DEF)), addr:$src2)>; } } multiclass avx512_sqrt_scalar_all opc, string OpcodeStr, X86SchedWriteSizes sched> { defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; } defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar, Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { def r : I, Sched<[sched]>; let mayLoad = 1 in def m : I, Sched<[sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX512] in { def : Pat<(ffloor _.FRC:$src), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src, (i32 0x9)))>; def : Pat<(fceil _.FRC:$src), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src, (i32 0xa)))>; def : Pat<(ftrunc _.FRC:$src), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src, (i32 0xb)))>; def : Pat<(frint _.FRC:$src), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src, (i32 0x4)))>; def : Pat<(fnearbyint _.FRC:$src), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src, (i32 0xc)))>; } let Predicates = [HasAVX512, OptForSize] in { def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src, (i32 0x9)))>; def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src, (i32 0xa)))>; def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src, (i32 0xb)))>; def : Pat<(frint (_.ScalarLdFrag addr:$src)), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src, (i32 0x4)))>; def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src, (i32 0xc)))>; } } defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SchedWriteFRnd.Scl, f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SchedWriteFRnd.Scl, f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; multiclass avx512_masked_scalar { let Predicates = [BasePredicate] in { def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), (extractelt _.VT:$dst, (iPTR 0))))), (!cast("V"#OpcPrefix#r_Intk) _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), (!cast("V"#OpcPrefix#r_Intkz) OutMask, _.VT:$src2, _.VT:$src1)>; } } multiclass avx512_masked_scalar_imm ImmV, dag OutMask, Predicate BasePredicate> { let Predicates = [BasePredicate] in { def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), (extractelt _.VT:$dst, (iPTR 0))))), (!cast("V"#OpcPrefix#r_Intk) _.VT:$dst, OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), (!cast("V"#OpcPrefix#r_Intkz) OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; } } //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- multiclass avx512_trunc_common opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, X86MemOperand x86memop> { let ExeDomain = DestInfo.ExeDomain in defm rr : AVX512_maskable, EVEX, T8XS, Sched<[sched]>; let mayStore = 1, mayLoad = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { def mr : AVX512XS8I, EVEX, Sched<[sched.Folded]>; def mrk : AVX512XS8I, EVEX, EVEX_K, Sched<[sched.Folded]>; }//mayStore = 1, mayLoad = 1, hasSideEffects = 0 } multiclass avx512_trunc_mr_lowering { def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), (!cast(Name#SrcInfo.ZSuffix##mr) addr:$dst, SrcInfo.RC:$src)>; def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, (SrcInfo.VT SrcInfo.RC:$src)), (!cast(Name#SrcInfo.ZSuffix##mrk) addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; } multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; } multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; } multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; } multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; } multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256, truncstorevi16, masked_truncstorevi16, X86vtrunc>; defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256, truncstore_s_vi16, masked_truncstore_s_vi16>; defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256, truncstore_us_vi16, masked_truncstore_us_vi16>; defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256, truncstorevi32, masked_truncstorevi32, X86vtrunc>; defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256, truncstore_s_vi32, masked_truncstore_s_vi32>; defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256, truncstore_us_vi32, masked_truncstore_us_vi32>; defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256, truncstorevi16, masked_truncstorevi16, X86vtrunc>; defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256, truncstore_s_vi16, masked_truncstore_s_vi16>; defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256, truncstore_us_vi16, masked_truncstore_us_vi16>; defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), (v8i16 (EXTRACT_SUBREG (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), (v4i32 (EXTRACT_SUBREG (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; } let Predicates = [HasBWI, NoVLX] in { def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm))), sub_xmm))>; } multiclass WriteShuffle256_common opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ let ExeDomain = DestInfo.ExeDomain in { defm rr : AVX512_maskable, EVEX, Sched<[sched]>; defm rm : AVX512_maskable, EVEX, Sched<[sched.Folded]>; } } multiclass WriteShuffle256_BW opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { defm Z128: WriteShuffle256_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; defm Z256: WriteShuffle256_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasBWI] in { defm Z : WriteShuffle256_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass WriteShuffle256_BD opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: WriteShuffle256_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; defm Z256: WriteShuffle256_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : WriteShuffle256_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass WriteShuffle256_BQ opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: WriteShuffle256_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; defm Z256: WriteShuffle256_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : WriteShuffle256_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; } } multiclass WriteShuffle256_WD opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: WriteShuffle256_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; defm Z256: WriteShuffle256_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : WriteShuffle256_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass WriteShuffle256_WQ opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: WriteShuffle256_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; defm Z256: WriteShuffle256_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : WriteShuffle256_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass WriteShuffle256_DQ opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, X86FoldableSchedWrite sched, PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: WriteShuffle256_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; defm Z256: WriteShuffle256_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : WriteShuffle256_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>; defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>; defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>; defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>; defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>; defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>; defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>; multiclass AVX512_pmovx_patterns { // 128-bit patterns let Predicates = [HasVLX, HasBWI] in { def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; } // 256-bit patterns let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; } // 512-bit patterns let Predicates = [HasBWI] in { def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))), (!cast(OpcPrefix#BWZrm) addr:$src)>; } let Predicates = [HasAVX512] in { def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BDZrm) addr:$src)>; def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BQZrm) addr:$src)>; def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BQZrm) addr:$src)>; def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))), (!cast(OpcPrefix#WDZrm) addr:$src)>; def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#WQZrm) addr:$src)>; def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))), (!cast(OpcPrefix#DQZrm) addr:$src)>; } } defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>; defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>; //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations // FIXME: Improve scheduling of gather/scatter instructions. multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag GatherNode, RegisterClass MaskRC = _.KRCWM> { let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", ExeDomain = _.ExeDomain in def rm : AVX5128I, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; } multiclass avx512_gather_q_pd dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; } } multiclass avx512_gather_d_ps dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512; defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512; let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256; defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256; defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128; } } defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag ScatterNode, RegisterClass MaskRC = _.KRCWM> { let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in def mr : AVX5128I, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteStore]>; } multiclass avx512_scatter_q_pd dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; } } multiclass avx512_scatter_d_ps dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512; defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512; let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256; defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256; defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; } } defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, RegisterClass KRC, X86MemOperand memop> { let Predicates = [HasPFI], hasSideEffects = 1 in def m : AVX5128I, EVEX, EVEX_K, Sched<[WriteLoad]>; } defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I, EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? } multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in defm Z : cvt_by_vec_width, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : cvt_by_vec_width, EVEX_V256; defm Z128 : cvt_by_vec_width, EVEX_V128; } } defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; multiclass convert_vector_to_mask_common opc, X86VectorVTInfo _, string OpcodeStr > { def rr : AVX512XS8I, EVEX, Sched<[WriteMove]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. multiclass convert_vector_to_mask_lowering { def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), (_.KVT (COPY_TO_REGCLASS (!cast(Name#"Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src, _.SubRegIdx)), _.KRC))>; } multiclass avx512_convert_vector_to_mask opc, string OpcodeStr, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : convert_vector_to_mask_common , EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : convert_vector_to_mask_common, EVEX_V256; defm Z128 : convert_vector_to_mask_common, EVEX_V128; } let Predicates = [prd, NoVLX] in { defm Z256_Alt : convert_vector_to_mask_lowering; defm Z128_Alt : convert_vector_to_mask_lowering; } } defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", avx512vl_i8_info, HasBWI>; defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", avx512vl_i16_info, HasBWI>, VEX_W; defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", avx512vl_i32_info, HasDQI>; defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", avx512vl_i64_info, HasDQI>, VEX_W; // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI // is available, but BWI is not. We can't handle this in lowering because // a target independent DAG combine likes to combine sext and trunc. let Predicates = [HasDQI, NoBWI] in { def : Pat<(v16i8 (sext (v16i1 VK16:$src))), (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; def : Pat<(v16i16 (sext (v16i1 VK16:$src))), (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; } //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // multiclass compress_by_vec_width_common opc, X86VectorVTInfo _, string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable, AVX5128IBase, Sched<[sched]>; let mayStore = 1, hasSideEffects = 0 in def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded]>; def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded]>; } multiclass compress_by_vec_width_lowering { def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(Name#_.ZSuffix##mrk) addr:$dst, _.KRCWM:$mask, _.RC:$src)>; } multiclass compress_by_elt_width opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in defm Z : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V512; let Predicates = [Pred, HasVLX] in { defm Z256 : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V256; defm Z128 : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPCOMPRESS? defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, avx512vl_i32_info>, EVEX; defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, avx512vl_i64_info>, EVEX, VEX_W; defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, avx512vl_f32_info>, EVEX; defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, avx512vl_f64_info>, EVEX, VEX_W; // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, ReadAfterLd]>; } multiclass expand_by_vec_width_lowering { def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), (!cast(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$src)>; def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), (!cast(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$src)>; def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, (_.VT _.RC:$src0))), (!cast(Name#_.ZSuffix##rmk) _.RC:$src0, _.KRCWM:$mask, addr:$src)>; } multiclass expand_by_elt_width opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in defm Z : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V512; let Predicates = [Pred, HasVLX] in { defm Z256 : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V256; defm Z128 : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPEXPAND? defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, avx512vl_i32_info>, EVEX; defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, avx512vl_i64_info>, EVEX, VEX_W; defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, avx512vl_f32_info>, EVEX; defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, avx512vl_f64_info>, EVEX, VEX_W; //handle instruction reg_vec1 = op(reg_vec,imm) // op(mem_vec,imm) // op(broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable, Sched<[sched]>; defm rmi : AVX512_maskable, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable, EVEX_B, Sched<[sched]>; } multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ let Predicates = [prd] in { defm Z : avx512_unary_fp_packed_imm, avx512_unary_fp_sae_packed_imm, EVEX_V512; } let Predicates = [prd, HasVLX] in { defm Z128 : avx512_unary_fp_packed_imm, EVEX_V128; defm Z256 : avx512_unary_fp_packed_imm, EVEX_V256; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable, Sched<[sched]>; defm rmi : AVX512_maskable, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ let ExeDomain = DestInfo.ExeDomain in { defm rri : AVX512_maskable, Sched<[sched]>; defm rmi : AVX512_maskable, Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>: avx512_3Op_rm_imm8{ let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_scalar,imm) multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_scalar, Sched<[sched]>; defm rmi : AVX512_maskable_scalar, Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable, EVEX_B, Sched<[sched]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm NAME#rrib : AVX512_maskable_scalar, EVEX_B, Sched<[sched]>; } multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ let Predicates = [prd] in { defm Z : avx512_fp_packed_imm, avx512_fp_sae_packed_imm, EVEX_V512; } let Predicates = [prd, HasVLX] in { defm Z128 : avx512_fp_packed_imm, EVEX_V128; defm Z256 : avx512_fp_packed_imm, EVEX_V256; } } multiclass avx512_common_3Op_rm_imm8 opc, SDNode OpNode, string OpStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; } let Predicates = [Pred, HasVLX] in { defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; } } multiclass avx512_common_3Op_imm8 opc, SDNode OpNode, X86SchedWriteWidths sched, Predicate Pred = HasAVX512> { let Predicates = [Pred] in { defm Z : avx512_3Op_imm8, EVEX_V512; } let Predicates = [Pred, HasVLX] in { defm Z128 : avx512_3Op_imm8, EVEX_V128; defm Z256 : avx512_3Op_imm8, EVEX_V256; } } multiclass avx512_common_fp_sae_scalar_imm opc, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> { let Predicates = [prd] in { defm Z128 : avx512_fp_scalar_imm, avx512_fp_sae_scalar_imm; } } multiclass avx512_common_unary_fp_sae_packed_imm_all opcPs, bits<8> opcPd, SDNode OpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; defm PD : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<64, CD8VF>, VEX_W; } defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, X86VRangeRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 0x50, X86VRange, X86VRangeRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; let Predicates = [HasAVX512] in { def : Pat<(v16f32 (ffloor VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>; def : Pat<(v16f32 (fnearbyint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; def : Pat<(v16f32 (fceil VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>; def : Pat<(v16f32 (frint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; def : Pat<(v16f32 (ftrunc VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>; def : Pat<(v8f64 (ffloor VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>; def : Pat<(v8f64 (fnearbyint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; def : Pat<(v8f64 (fceil VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>; def : Pat<(v8f64 (frint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; def : Pat<(v8f64 (ftrunc VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>; } let Predicates = [HasVLX] in { def : Pat<(v4f32 (ffloor VR128X:$src)), (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>; def : Pat<(v4f32 (fnearbyint VR128X:$src)), (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>; def : Pat<(v4f32 (fceil VR128X:$src)), (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>; def : Pat<(v4f32 (frint VR128X:$src)), (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>; def : Pat<(v4f32 (ftrunc VR128X:$src)), (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>; def : Pat<(v2f64 (ffloor VR128X:$src)), (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>; def : Pat<(v2f64 (fnearbyint VR128X:$src)), (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>; def : Pat<(v2f64 (fceil VR128X:$src)), (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>; def : Pat<(v2f64 (frint VR128X:$src)), (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>; def : Pat<(v2f64 (ftrunc VR128X:$src)), (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>; def : Pat<(v8f32 (ffloor VR256X:$src)), (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>; def : Pat<(v8f32 (fnearbyint VR256X:$src)), (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>; def : Pat<(v8f32 (fceil VR256X:$src)), (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>; def : Pat<(v8f32 (frint VR256X:$src)), (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>; def : Pat<(v8f32 (ftrunc VR256X:$src)), (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>; def : Pat<(v4f64 (ffloor VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>; def : Pat<(v4f64 (fnearbyint VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>; def : Pat<(v4f64 (fceil VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>; def : Pat<(v4f64 (frint VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>; def : Pat<(v4f64 (ftrunc VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>; } multiclass avx512_shuff_packed_128_common opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo CastInfo> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable, Sched<[sched]>; defm rmi : AVX512_maskable, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_shuff_packed_128 opc>{ let Predicates = [HasAVX512] in defm Z : avx512_shuff_packed_128_common, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256 : avx512_shuff_packed_128_common, EVEX_V256; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasAVX512] in { // Provide fallback in case the load node that is used in the broadcast // patterns above is used by additional users, which prevents the pattern // selection. def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 0)>; } multiclass avx512_valign { defm NAME: avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V; } defm VALIGND: avx512_valign<"valignd", SchedWriteShuffle, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VALIGNQ: avx512_valign<"valignq", SchedWriteShuffle, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SchedWriteShuffle, avx512vl_i8_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. def ValignqImm32XForm : SDNodeXFormgetZExtValue() * 2, SDLoc(N)); }]>; def ValignqImm8XForm : SDNodeXFormgetZExtValue() * 8, SDLoc(N)); }]>; def ValigndImm8XForm : SDNodeXFormgetZExtValue() * 4, SDLoc(N)); }]>; multiclass avx512_vpalign_mask_lowering { def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, imm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, (ImmXForm imm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, imm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, (ImmXForm imm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert (To.LdFrag addr:$src2)), imm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert (To.LdFrag addr:$src2)), imm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; } multiclass avx512_vpalign_mask_lowering_mb : avx512_vpalign_mask_lowering { def : Pat<(From.VT (OpNode From.RC:$src1, (bitconvert (To.VT (X86VBroadcast (To.ScalarLdFrag addr:$src2)))), imm:$src3)), (!cast(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert (To.VT (X86VBroadcast (To.ScalarLdFrag addr:$src2)))), imm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert (To.VT (X86VBroadcast (To.ScalarLdFrag addr:$src2)))), imm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; } let Predicates = [HasAVX512] in { // For 512-bit we lower to the widest element type we can. So we only need // to handle converting valignq to valignd. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, v16i32_info, ValignqImm32XForm>; } let Predicates = [HasVLX] in { // For 128-bit we lower to the widest element type we can. So we only need // to handle converting valignq to valignd. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, v4i32x_info, ValignqImm32XForm>; // For 256-bit we lower to the widest element type we can. So we only need // to handle converting valignq to valignd. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, v8i32x_info, ValignqImm32XForm>; } let Predicates = [HasVLX, HasBWI] in { // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, v16i8x_info, ValignqImm8XForm>; defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, v16i8x_info, ValigndImm8XForm>; } defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } } multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> : avx512_unary_rm { defm rmb : AVX512_maskable, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_unary_rm, EVEX_V256; defm Z128 : avx512_unary_rm, EVEX_V128; } } multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_unary_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_unary_rmb, EVEX_V256; defm Z128 : avx512_unary_rmb, EVEX_V128; } } multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd> { defm Q : avx512_unary_rmb_vl, VEX_W; defm D : avx512_unary_rmb_vl; } multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, Predicate prd> { defm W : avx512_unary_rm_vl, VEX_WIG; defm B : avx512_unary_rm_vl, VEX_WIG; } multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm NAME : avx512_unary_rm_vl_dq, avx512_unary_rm_vl_bw; } defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SchedWriteVecALU>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v4i64 (abs VR256X:$src)), (EXTRACT_SUBREG (VPABSQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), sub_ymm)>; def : Pat<(v2i64 (abs VR128X:$src)), (EXTRACT_SUBREG (VPABSQZrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), sub_xmm)>; } // Use 512bit version to implement 128/256 bit. multiclass avx512_unary_lowering { let Predicates = [prd, NoVLX] in { def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), _.info256.RC:$src1, _.info256.SubRegIdx)), _.info256.SubRegIdx)>; def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), _.info128.RC:$src1, _.info128.SubRegIdx)), _.info128.SubRegIdx)>; } } defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, SchedWriteVecIMul, HasCDI>; // FIXME: Is there a better scheduler class for VPCONFLICT? defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, SchedWriteVecALU, HasCDI>; // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; //===---------------------------------------------------------------------===// // Counts number of ones - VPOPCNTD and VPOPCNTQ //===---------------------------------------------------------------------===// // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, SchedWriteVecALU, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm NAME: avx512_unary_rm_vl, XS; } defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SchedWriteFShuffle>; defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SchedWriteFShuffle>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, Sched<[sched]>; defm rm : AVX512_maskable, EVEX, EVEX_CD8<_.EltSize, CD8VH>, Sched<[sched.Folded]>; } } multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_unary_rm, EVEX_V256; defm Z128 : avx512_movddup_128, EVEX_V128; } } multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm NAME: avx512_movddup_common, XD, VEX_W; } defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (bitconvert (v4i32 immAllZerosV))), (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), (bitconvert (v4i32 immAllZerosV))), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (bitconvert (v4i32 immAllZerosV))), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// // AVX-512 - Unpack Instructions //===----------------------------------------------------------------------===// defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, SchedWriteFShuffleSizes>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, SchedWriteFShuffleSizes>; defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, SchedWriteShuffle, HasBWI>; defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, SchedWriteShuffle, HasBWI>; defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, SchedWriteShuffle, HasBWI>; defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, SchedWriteShuffle, HasBWI>; defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, SchedWriteShuffle, HasAVX512>; defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, SchedWriteShuffle, HasAVX512>; defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, SchedWriteShuffle, HasAVX512>; defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, SchedWriteShuffle, HasAVX512>; //===----------------------------------------------------------------------===// // AVX-512 - Extract & Insert Integer Instructions //===----------------------------------------------------------------------===// multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { def mr : AVX512Ii8, EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; } multiclass avx512_extract_elt_b { let Predicates = [HasBWI] in { def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, EVEX, TAPD, Sched<[WriteVecExtract]>; defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; } } multiclass avx512_extract_elt_w { let Predicates = [HasBWI] in { def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, EVEX, PD, Sched<[WriteVecExtract]>; let hasSideEffects = 0 in def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX, TAPD, FoldGenData, Sched<[WriteVecExtract]>; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } } multiclass avx512_extract_elt_dq { let Predicates = [HasDQI] in { def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GRC:$dst, (extractelt (_.VT _.RC:$src1), imm:$src2))]>, EVEX, TAPD, Sched<[WriteVecExtract]>; def mr : AVX512Ii8<0x16, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (_.VT _.RC:$src1), imm:$src2),addr:$dst)]>, EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, Sched<[WriteVecExtractSt]>; } } defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; multiclass avx512_insert_elt_m opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, PatFrag LdFrag> { def rm : AVX512Ii8, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>; } multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, PatFrag LdFrag> { let Predicates = [HasBWI] in { def rr : AVX512Ii8, EVEX_4V, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m; } } multiclass avx512_insert_elt_dq opc, string OpcodeStr, X86VectorVTInfo _, RegisterClass GRC> { let Predicates = [HasDQI] in { def rr : AVX512Ii8, EVEX_4V, TAPD, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m, TAPD; } } defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, extloadi8>, TAPD, VEX_WIG; defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, extloadi16>, PD, VEX_WIG; defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations //===----------------------------------------------------------------------===// multiclass avx512_shufp{ defm NAME: avx512_common_3Op_imm8, EVEX_CD8, AVX512AIi8Base, EVEX_4V; } defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - Byte shift Left/Right //===----------------------------------------------------------------------===// // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ def rr : AVX512, Sched<[sched]>; def rm : AVX512, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, X86SchedWriteWidths sched, Predicate prd>{ let Predicates = [prd] in defm Z : avx512_shift_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_packed, EVEX_V256; defm Z128 : avx512_shift_packed, EVEX_V128; } } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", SchedWriteShuffle, HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", SchedWriteShuffle, HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; multiclass avx512_psadbw_packed opc, SDNode OpNode, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _dst, X86VectorVTInfo _src> { def rr : AVX512BI, Sched<[sched]>; def rm : AVX512BI, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_psadbw_packed_all opc, SDNode OpNode, string OpcodeStr, X86SchedWriteWidths sched, Predicate prd> { let Predicates = [prd] in defm Z : avx512_psadbw_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_psadbw_packed, EVEX_V256; defm Z128 : avx512_psadbw_packed, EVEX_V128; } } defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. def VPTERNLOG321_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 1/4 and 3/6. uint8_t NewImm = Imm & 0xa5; if (Imm & 0x02) NewImm |= 0x10; if (Imm & 0x10) NewImm |= 0x02; if (Imm & 0x08) NewImm |= 0x40; if (Imm & 0x40) NewImm |= 0x08; return getI8Imm(NewImm, SDLoc(N)); }]>; def VPTERNLOG213_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 2/4 and 3/5. uint8_t NewImm = Imm & 0xc3; if (Imm & 0x04) NewImm |= 0x10; if (Imm & 0x10) NewImm |= 0x04; if (Imm & 0x08) NewImm |= 0x20; if (Imm & 0x20) NewImm |= 0x08; return getI8Imm(NewImm, SDLoc(N)); }]>; def VPTERNLOG132_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 1/2 and 5/6. uint8_t NewImm = Imm & 0x99; if (Imm & 0x02) NewImm |= 0x04; if (Imm & 0x04) NewImm |= 0x02; if (Imm & 0x20) NewImm |= 0x40; if (Imm & 0x40) NewImm |= 0x20; return getI8Imm(NewImm, SDLoc(N)); }]>; def VPTERNLOG231_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 uint8_t NewImm = Imm & 0x81; if (Imm & 0x02) NewImm |= 0x04; if (Imm & 0x04) NewImm |= 0x10; if (Imm & 0x08) NewImm |= 0x40; if (Imm & 0x10) NewImm |= 0x02; if (Imm & 0x20) NewImm |= 0x08; if (Imm & 0x40) NewImm |= 0x20; return getI8Imm(NewImm, SDLoc(N)); }]>; def VPTERNLOG312_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 uint8_t NewImm = Imm & 0x81; if (Imm & 0x02) NewImm |= 0x10; if (Imm & 0x04) NewImm |= 0x02; if (Imm & 0x08) NewImm |= 0x20; if (Imm & 0x10) NewImm |= 0x04; if (Imm & 0x20) NewImm |= 0x40; if (Imm & 0x40) NewImm |= 0x08; return getI8Imm(NewImm, SDLoc(N)); }]>; multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, ReadAfterLd]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; // Additional patterns for matching loads in other positions. def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; // Additional patterns for matching zero masking with loads in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; // Additional patterns for matching masked loads with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; // Additional patterns for matching broadcasts in other positions. def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; // Additional patterns for matching zero masking with broadcasts in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; // Additional patterns for matching masked broadcasts with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; } multiclass avx512_common_ternlog { let Predicates = [HasAVX512] in defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, _.info512, NAME>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, _.info128, NAME>, EVEX_V128; defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, _.info256, NAME>, EVEX_V256; } } defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, avx512vl_i32_info>; defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, avx512vl_i64_info>, VEX_W; // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source // for all operands to prevent a false dependency. // TODO: We should maybe have a more generalized algorithm for folding to // vpternlog. let Predicates = [HasAVX512] in { def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; } let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; } let Predicates = [HasVLX] in { def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; } //===----------------------------------------------------------------------===// // AVX-512 - FixupImm //===----------------------------------------------------------------------===// multiclass avx512_fixupimm_packed opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src, Sched<[sched]>; defm rmi : AVX512_maskable_3src, Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } // Constraints = "$src1 = $dst" } multiclass avx512_fixupimm_packed_sae opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rrib : AVX512_maskable_3src, EVEX_B, Sched<[sched]>; } } multiclass avx512_fixupimm_scalar opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo _src3VT> { let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src_scalar, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; defm rmi : AVX512_maskable_3src_scalar, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fixupimm_packed_all { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, _Vec.info512>, avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM, _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128; defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM, _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256; } } defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VFIXUPIMMPS : avx512_fixupimm_packed_all, EVEX_CD8<32, CD8VF>; defm VFIXUPIMMPD : avx512_fixupimm_packed_all, EVEX_CD8<64, CD8VF>, VEX_W; // Patterns used to select SSE scalar fp arithmetic instructions from // either: // // (1) a scalar fp operation followed by a blend // // The effect is that the backend no longer emits unnecessary vector // insert instructions immediately after SSE scalar fp instructions // like addss or mulss. // // For example, given the following code: // __m128 foo(__m128 A, __m128 B) { // A[0] += B[0]; // return A; // } // // Previously we generated: // addss %xmm0, %xmm1 // movss %xmm1, %xmm0 // // We now generate: // addss %xmm1, %xmm0 // // (2) a vector packed single/double fp operation followed by a vector insert // // The effect is that the backend converts the packed fp instruction // followed by a vector insert into a single SSE scalar fp instruction. // // For example, given the following code: // __m128 foo(__m128 A, __m128 B) { // __m128 C = A + B; // return (__m128) {c[0], a[1], a[2], a[3]}; // } // // Previously we generated: // addps %xmm0, %xmm1 // movss %xmm1, %xmm0 // // We now generate: // addss %xmm1, %xmm0 // TODO: Some canonicalization in lowering would simplify the number of // patterns we have to try to match. multiclass AVX512_scalar_math_fp_patterns { let Predicates = [HasAVX512] in { // extracted scalar math op with insert via movss def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), _.FRC:$src))))), (!cast("V"#OpcPrefix#Zrr_Int) _.VT:$dst, (COPY_TO_REGCLASS _.FRC:$src, VR128X))>; // vector math op with insert via movss def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))), (!cast("V"#OpcPrefix#Zrr_Int) _.VT:$dst, _.VT:$src)>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector (X86selects VK1WM:$mask, (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src2), _.FRC:$src0))), (!cast("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS _.FRC:$src0, VR128X), VK1WM:$mask, _.VT:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector (X86selects VK1WM:$mask, (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src2), (_.EltVT ZeroFP)))), (!cast("V"#OpcPrefix#Zrr_Intkz) VK1WM:$mask, _.VT:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; } } defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; defm : AVX512_scalar_math_fp_patterns; //===----------------------------------------------------------------------===// // AES instructions //===----------------------------------------------------------------------===// multiclass avx512_vaes Op, string OpStr, string IntPrefix> { let Predicates = [HasVLX, HasVAES] in { defm Z128 : AESI_binop_rm_int(IntPrefix), loadv2i64, 0, VR128X, i128mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; defm Z256 : AESI_binop_rm_int(IntPrefix##"_256"), loadv4i64, 0, VR256X, i256mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512, HasVAES] in defm Z : AESI_binop_rm_int(IntPrefix##"_512"), loadv8i64, 0, VR512, i512mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; } defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; //===----------------------------------------------------------------------===// // PCLMUL instructions - Carry less multiplication //===----------------------------------------------------------------------===// let Predicates = [HasAVX512, HasVPCLMULQDQ] in defm VPCLMULQDQZ : vpclmulqdq, EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; let Predicates = [HasVLX, HasVPCLMULQDQ] in { defm VPCLMULQDQZ128 : vpclmulqdq, EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; defm VPCLMULQDQZ256: vpclmulqdq, EVEX_4V, EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_WIG; } // Aliases defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; //===----------------------------------------------------------------------===// // VBMI2 //===----------------------------------------------------------------------===// multiclass VBMI2_shift_var_rm Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass VBMI2_shift_var_rmb Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> : VBMI2_shift_var_rm { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass VBMI2_shift_var_rm_common Op, string OpStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in defm Z : VBMI2_shift_var_rm, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { defm Z256 : VBMI2_shift_var_rm, EVEX_V256; defm Z128 : VBMI2_shift_var_rm, EVEX_V128; } } multiclass VBMI2_shift_var_rmb_common Op, string OpStr, SDNode OpNode, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in defm Z : VBMI2_shift_var_rmb, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { defm Z256 : VBMI2_shift_var_rmb, EVEX_V256; defm Z128 : VBMI2_shift_var_rmb, EVEX_V128; } } multiclass VBMI2_shift_var wOp, bits<8> dqOp, string Prefix, SDNode OpNode, X86SchedWriteWidths sched> { defm W : VBMI2_shift_var_rm_common, VEX_W, EVEX_CD8<16, CD8VF>; defm D : VBMI2_shift_var_rmb_common, EVEX_CD8<32, CD8VF>; defm Q : VBMI2_shift_var_rmb_common, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass VBMI2_shift_imm wOp, bits<8> dqOp, string Prefix, SDNode OpNode, X86SchedWriteWidths sched> { defm W : avx512_common_3Op_rm_imm8, VEX_W, EVEX_CD8<16, CD8VF>; defm D : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm Q : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; } // Concat & Shift defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; // Compress defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, avx512vl_i8_info, HasVBMI2>, EVEX; defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; // Expand defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, avx512vl_i8_info, HasVBMI2>, EVEX; defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; //===----------------------------------------------------------------------===// // VNNI //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { defm r : AVX512_maskable_3src, EVEX_4V, T8PD, Sched<[sched]>; defm m : AVX512_maskable_3src, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, Sched<[sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable_3src, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD, Sched<[sched.Folded, ReadAfterLd]>; } multiclass VNNI_common Op, string OpStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasVNNI] in defm Z : VNNI_rmb, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { defm Z256 : VNNI_rmb, EVEX_V256; defm Z128 : VNNI_rmb, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPDP? defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; //===----------------------------------------------------------------------===// // Bit Algorithms //===----------------------------------------------------------------------===// // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, avx512vl_i8_info, HasBITALG>; defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, avx512vl_i16_info, HasBITALG>, VEX_W; defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; multiclass VPSHUFBITQMB_rm { defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.RC:$src2), "vpshufbitqmb", "$src2, $src1", "$src1, $src2", (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, Sched<[sched]>; defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.MemOp:$src2), "vpshufbitqmb", "$src2, $src1", "$src1, $src2", (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, Sched<[sched.Folded, ReadAfterLd]>; } multiclass VPSHUFBITQMB_common { let Predicates = [HasBITALG] in defm Z : VPSHUFBITQMB_rm, EVEX_V512; let Predicates = [HasBITALG, HasVLX] in { defm Z256 : VPSHUFBITQMB_rm, EVEX_V256; defm Z128 : VPSHUFBITQMB_rm, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPSHUFBITQMB? defm VPSHUFBITQMB : VPSHUFBITQMB_common; //===----------------------------------------------------------------------===// // GFNI //===----------------------------------------------------------------------===// multiclass GF2P8MULB_avx512_common Op, string OpStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in defm Z : avx512_binop_rm, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { defm Z256 : avx512_binop_rm, EVEX_V256; defm Z128 : avx512_binop_rm, EVEX_V128; } } defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, SchedWriteVecALU>, EVEX_CD8<8, CD8VF>, T8PD; multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo VTI, X86VectorVTInfo BcstVTI> : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in defm Z : GF2P8AFFINE_avx512_rmb_imm, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { defm Z256 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V256; defm Z128 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V128; } } defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", X86GF2P8affineinvqb, SchedWriteVecIMul>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", X86GF2P8affineqb, SchedWriteVecIMul>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; //===----------------------------------------------------------------------===// // AVX5124FMAPS //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, Constraints = "$src1 = $dst" in { defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "v4fmaddps", "$src3, $src2", "$src2, $src3", []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>; defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "v4fnmaddps", "$src3, $src2", "$src2, $src3", []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>; defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), "v4fmaddss", "$src3, $src2", "$src2, $src3", []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>; defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), "v4fnmaddss", "$src3, $src2", "$src2, $src3", []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>; } //===----------------------------------------------------------------------===// // AVX5124VNNIW //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, Constraints = "$src1 = $dst" in { defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "vp4dpwssd", "$src3, $src2", "$src2, $src3", []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>; defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "vp4dpwssds", "$src3, $src2", "$src2, $src3", []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>; }