diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2020-12-02 17:59:09 +0000 |
---|---|---|
committer | Martyn Capewell <martyn.capewell@arm.com> | 2021-01-08 11:19:31 +0000 |
commit | 2e66dc71cadca95042b675dc72c5b6ed00e18562 (patch) | |
tree | 4a5df7aef2935437e9214b16b0be7b1e000b5bd2 | |
parent | ba9a148b62237919c4853db5477ca22540691848 (diff) |
[sve2] Implement indexed multiply-accumulate long
Implement indexed multiply-accumulate and subtract instructions for signed and
unsigned inputs.
Change-Id: Ifd16fdf1b2cde4aae11343dfa377400f1ab7fc35
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 41 | ||||
-rw-r--r-- | src/aarch64/assembler-sve-aarch64.cc | 232 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 32 | ||||
-rw-r--r-- | src/aarch64/instructions-aarch64.cc | 25 | ||||
-rw-r--r-- | src/aarch64/instructions-aarch64.h | 1 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-aarch64.h | 86 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-sve-aarch64.cc | 10 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 201 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.h | 1 | ||||
-rw-r--r-- | test/aarch64/test-disasm-sve-aarch64.cc | 87 | ||||
-rw-r--r-- | test/aarch64/test-simulator-sve2-aarch64.cc | 144 |
11 files changed, 460 insertions, 400 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index e665cbfd..43865312 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -6150,25 +6150,37 @@ class Assembler : public vixl::internal::AssemblerBase { const ZRegister& zm); // Signed multiply-add long to accumulator (bottom, indexed). - void smlalb(const ZRegister& zda, const ZRegister& zn); + void smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Signed multiply-add long to accumulator (bottom). void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Signed multiply-add long to accumulator (top, indexed). - void smlalt(const ZRegister& zda, const ZRegister& zn); + void smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Signed multiply-add long to accumulator (top). void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Signed multiply-subtract long from accumulator (bottom, indexed). - void smlslb(const ZRegister& zda, const ZRegister& zn); + void smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Signed multiply-subtract long from accumulator (bottom). void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Signed multiply-subtract long from accumulator (top, indexed). - void smlslt(const ZRegister& zda, const ZRegister& zn); + void smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Signed multiply-subtract long from accumulator (top). void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); @@ -6560,25 +6572,37 @@ class Assembler : public vixl::internal::AssemblerBase { const ZRegister& zm); // Unsigned multiply-add long to accumulator (bottom, indexed). - void umlalb(const ZRegister& zda, const ZRegister& zn); + void umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Unsigned multiply-add long to accumulator (bottom). void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Unsigned multiply-add long to accumulator (top, indexed). - void umlalt(const ZRegister& zda, const ZRegister& zn); + void umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Unsigned multiply-add long to accumulator (top). void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Unsigned multiply-subtract long from accumulator (bottom, indexed). - void umlslb(const ZRegister& zda, const ZRegister& zn); + void umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Unsigned multiply-subtract long from accumulator (bottom). void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); // Unsigned multiply-subtract long from accumulator (top, indexed). - void umlslt(const ZRegister& zda, const ZRegister& zn); + void umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); // Unsigned multiply-subtract long from accumulator (top). void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); @@ -7695,6 +7719,7 @@ class Assembler : public vixl::internal::AssemblerBase { Instr op_s, Instr op_d); + Instr SVEMulLongIndexHelper(const ZRegister& zm, int index); void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop, const PRegister& pg, diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc index 9af19463..6d9b7987 100644 --- a/src/aarch64/assembler-sve-aarch64.cc +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -1560,15 +1560,15 @@ Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, op = op_h; break; case kSRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 7); - VIXL_ASSERT(IsUint2(index)); + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint2(index)); // Top two bits of "zm" encode the index. zm_with_index |= (index & 3) << (Rm_offset + 3); op = op_s; break; case kDRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 15); - VIXL_ASSERT(IsUint1(index)); + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint1(index)); // Top bit of "zm" encodes the index. zm_with_index |= (index & 1) << (Rm_offset + 4); op = op_d; @@ -1579,6 +1579,28 @@ Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, return op | zm_with_index | size; } +Instr Assembler::SVEMulLongIndexHelper(const ZRegister& zm, int index) { + Instr imm_field; + Instr zm_id; + if (zm.IsLaneSizeH()) { + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint3(index)); + imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19; + zm_id = Rx<18, 16>(zm); + } else { + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint2(index)); + imm_field = ExtractBit(index, 1) << 20; + zm_id = Rx<19, 16>(zm); + } + + // Synthesize the low part of immediate encoding. + imm_field |= ExtractBit(index, 0) << 11; + + return zm_id | imm_field; +} + // SVEFPMulAddIndex. void Assembler::fmla(const ZRegister& zda, @@ -7746,19 +7768,32 @@ void Assembler::sminp(const ZRegister& zd, Emit(0x4416a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); } -// This prototype maps to 2 instruction encodings: -// smlalb_z_zzzi_d -// smlalb_z_zzzi_s -void Assembler::smlalb(const ZRegister& zda, const ZRegister& zn) { - // SMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1000 .0.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e08000 | Rd(zda) | Rn(zn)); -} +#define VIXL_SVE_MULL_INDEX_LIST(V) \ + V(smlalb, 0x44a08000) \ + V(smlalt, 0x44a08400) \ + V(smlslb, 0x44a0a000) \ + V(smlslt, 0x44a0a400) \ + V(umlalb, 0x44a09000) \ + V(umlalt, 0x44a09400) \ + V(umlslb, 0x44a0b000) \ + V(umlslt, 0x44a0b400) \ + V(sqdmullb, 0x44a0e000) \ + V(sqdmullt, 0x44a0e400) + +#define VIXL_DEFINE_ASM_FUNC(MNE, OP) \ + void Assembler::MNE(const ZRegister& zda, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \ + VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ + VIXL_ASSERT(zda.IsLaneSizeD() || zda.IsLaneSizeS()); \ + VIXL_ASSERT(zda.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); \ + Instr zm_with_index = SVEMulLongIndexHelper(zm, index); \ + Emit(OP | SVESize(zda) | Rd(zda) | Rn(zn) | zm_with_index); \ + } +VIXL_SVE_MULL_INDEX_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FuNC void Assembler::smlalb(const ZRegister& zda, const ZRegister& zn, @@ -7775,20 +7810,6 @@ void Assembler::smlalb(const ZRegister& zda, Emit(0x44004000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// smlalt_z_zzzi_d -// smlalt_z_zzzi_s -void Assembler::smlalt(const ZRegister& zda, const ZRegister& zn) { - // SMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1000 .1.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e08400 | Rd(zda) | Rn(zn)); -} - void Assembler::smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -7804,20 +7825,6 @@ void Assembler::smlalt(const ZRegister& zda, Emit(0x44004400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// smlslb_z_zzzi_d -// smlslb_z_zzzi_s -void Assembler::smlslb(const ZRegister& zda, const ZRegister& zn) { - // SMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1010 .0.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e0a000 | Rd(zda) | Rn(zn)); -} - void Assembler::smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -7833,20 +7840,6 @@ void Assembler::smlslb(const ZRegister& zda, Emit(0x44005000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// smlslt_z_zzzi_d -// smlslt_z_zzzi_s -void Assembler::smlslt(const ZRegister& zda, const ZRegister& zn) { - // SMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1010 .1.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e0a400 | Rd(zda) | Rn(zn)); -} - void Assembler::smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -8229,37 +8222,6 @@ void Assembler::sqdmulh(const ZRegister& zd, void Assembler::sqdmullb(const ZRegister& zd, const ZRegister& zn, - const ZRegister& zm, - int index) { - // SQDMULLB <Zd>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1110 .0.. .... .... - // size<23:22> | opc<20:16> | il<11> | T<10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeS()); - - Instr imm_field; - Instr zm_id; - if (zd.IsLaneSizeS()) { - VIXL_ASSERT(IsUint7(index)); - imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19; - zm_id = Rx<18, 16>(zm); - } else { - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(IsUint3(index)); - imm_field = ExtractBit(index, 1) << 20; - zm_id = Rx<19, 16>(zm); - } - - // Synthesize the low part of immediate encoding. - imm_field |= ExtractBit(index, 0) << 11; - - Emit(0x44a0e000 | SVESize(zd) | Rd(zd) | Rn(zn) | zm_id | imm_field); -} - -void Assembler::sqdmullb(const ZRegister& zd, - const ZRegister& zn, const ZRegister& zm) { // SQDMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb> // 0100 0101 ..0. .... 0110 00.. .... .... @@ -8273,40 +8235,6 @@ void Assembler::sqdmullb(const ZRegister& zd, Emit(0x45006000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// sqdmullt_z_zzi_d -// sqdmullt_z_zzi_s -void Assembler::sqdmullt(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int index) { - // SQDMULLT <Zd>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1110 .1.. .... .... - // size<23:22> | opc<20:16> | il<11> | T<10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); - - Instr imm_field; - Instr zm_id; - if (zd.IsLaneSizeS()) { - VIXL_ASSERT(IsUint7(index)); - imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19; - zm_id = Rx<18, 16>(zm); - } else { - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(IsUint3(index)); - imm_field = ExtractBit(index, 1) << 20; - zm_id = Rx<19, 16>(zm); - } - - // Synthesize the low part of immediate encoding. - imm_field |= ExtractBit(index, 0) << 11; - - Emit(0x44a0e400 | SVESize(zd) | Rd(zd) | Rn(zn) | zm_id | imm_field); -} - void Assembler::sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { @@ -9241,20 +9169,6 @@ void Assembler::uminp(const ZRegister& zd, Emit(0x4417a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); } -// This prototype maps to 2 instruction encodings: -// umlalb_z_zzzi_d -// umlalb_z_zzzi_s -void Assembler::umlalb(const ZRegister& zda, const ZRegister& zn) { - // UMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1001 .0.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e09000 | Rd(zda) | Rn(zn)); -} - void Assembler::umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -9270,20 +9184,6 @@ void Assembler::umlalb(const ZRegister& zda, Emit(0x44004800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// umlalt_z_zzzi_d -// umlalt_z_zzzi_s -void Assembler::umlalt(const ZRegister& zda, const ZRegister& zn) { - // UMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1001 .1.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e09400 | Rd(zda) | Rn(zn)); -} - void Assembler::umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -9299,20 +9199,6 @@ void Assembler::umlalt(const ZRegister& zda, Emit(0x44004c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// umlslb_z_zzzi_d -// umlslb_z_zzzi_s -void Assembler::umlslb(const ZRegister& zda, const ZRegister& zn) { - // UMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1011 .0.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e0b000 | Rd(zda) | Rn(zn)); -} - void Assembler::umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { @@ -9328,20 +9214,6 @@ void Assembler::umlslb(const ZRegister& zda, Emit(0x44005800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } -// This prototype maps to 2 instruction encodings: -// umlslt_z_zzzi_d -// umlslt_z_zzzi_s -void Assembler::umlslt(const ZRegister& zda, const ZRegister& zn) { - // UMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>] - // 0100 0100 111. .... 1011 .1.. .... .... - // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> | - // Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); - - Emit(0x44e0b400 | Rd(zda) | Rn(zn)); -} - void Assembler::umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm) { diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 3ba3aafc..b3f38997 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -145,17 +145,17 @@ Disassembler::FormToVisitorFnMap Disassembler::form_to_visitor_ = { {"smaxp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, {"sminp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, {"smlalb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"smlalb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"smlalb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"smlalb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"smlalt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"smlalt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"smlalt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"smlalt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"smlslb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"smlslb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"smlslb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"smlslb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"smlslt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"smlslt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"smlslt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"smlslt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"smulh_z_zz", &Disassembler::Disassemble_ZdT_ZnT_ZmT}, {"smullb_z_zz", &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, {"smullb_z_zzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, @@ -270,17 +270,17 @@ Disassembler::FormToVisitorFnMap Disassembler::form_to_visitor_ = { {"umaxp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, {"uminp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, {"umlalb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"umlalb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"umlalb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"umlalb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"umlalt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"umlalt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"umlalt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"umlalt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"umlslb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"umlslb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"umlslb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"umlslb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"umlslt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, - {"umlslt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, - {"umlslt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"umlslt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, {"umulh_z_zz", &Disassembler::Disassemble_ZdT_ZnT_ZmT}, {"umullb_z_zz", &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, {"umullb_z_zzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc index b918cf3f..0f134600 100644 --- a/src/aarch64/instructions-aarch64.cc +++ b/src/aarch64/instructions-aarch64.cc @@ -638,6 +638,31 @@ std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const { return std::make_pair(reg_code, index); } +// Get the register and index for SVE indexed long multiplies encoded in the +// forms: +// .h : Zm = <18:16>, index = <20:19><11> +// .s : Zm = <19:16>, index = <20><11> +std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const { + int reg_code = GetRmLow16(); + int index = ExtractBit(11); + + // For long multiplies, the SVE size field <23:22> encodes the destination + // element size. The source element size is half the width. + switch (GetSVEVectorFormat()) { + case kFormatVnS: + reg_code &= 7; + index |= ExtractBits(20, 19) << 1; + break; + case kFormatVnD: + index |= ExtractBit(20) << 1; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + return std::make_pair(reg_code, index); +} + // Logical immediates can't encode zero, so a return value of zero is used to // indicate a failure case. Specifically, where the constraints on imm_s are // not met. diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h index 318eac54..3ef94a4c 100644 --- a/src/aarch64/instructions-aarch64.h +++ b/src/aarch64/instructions-aarch64.h @@ -350,6 +350,7 @@ class Instruction { std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const; std::pair<int, int> GetSVEMulZmAndIndex() const; + std::pair<int, int> GetSVEMulLongZmAndIndex() const; std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const; diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 7e088eeb..be89dbac 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -6760,38 +6760,38 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm); - void Smlalb(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - smlalb(zda, zn); - } + void Smlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); void Smlalb(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Smlalt(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - smlalt(zda, zn); - } + void Smlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); void Smlalt(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Smlslb(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - smlslb(zda, zn); - } + void Smlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); void Smlslb(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Smlslt(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - smlslt(zda, zn); - } + void Smlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); void Smlslt(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, @@ -7199,39 +7199,39 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm); - void Umlalb(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - umlalb(zda, zn); - } + void Umlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); void Umlalb(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Umlalt(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - umlalt(zda, zn); - } - void Umlalt(const ZRegister& zda, + void Umlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlalt(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Umlslb(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - umlslb(zda, zn); - } - void Umlslb(const ZRegister& zda, + void Umlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslb(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); - void Umlslt(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - umlslt(zda, zn); - } - void Umlslt(const ZRegister& zda, + void Umlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslt(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm); diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc index 171e5809..b03b8fae 100644 --- a/src/aarch64/macro-assembler-sve-aarch64.cc +++ b/src/aarch64/macro-assembler-sve-aarch64.cc @@ -1862,7 +1862,15 @@ VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC) V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \ V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \ V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \ - V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) + V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \ + V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \ + V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \ + V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \ + V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \ + V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \ + V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \ + V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \ + V(Umlslt, umlslt, FourRegOneImmDestructiveHelper) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 0f697084..c1753692 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -170,24 +170,24 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = { {"smaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair}, {"sminp_z_p_zz", &Simulator::SimulateSVEIntArithPair}, {"smlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"smlalb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"smlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"smlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"smlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"smlalt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"smlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"smlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"smlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"smlslb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"smlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"smlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"smlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"smlslt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"smlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"smlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"smlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"smulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT}, {"smullb_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"smullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"smullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"smullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"smullt_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"smullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"smullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"smullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"splice_z_p_zz_con", &Simulator::VisitSVEVectorSplice}, {"sqabs_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT}, {"sqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic}, @@ -212,10 +212,10 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = { {"sqdmulh_z_zzi_s", &Simulator::Simulate_ZdS_ZnS_ZmS_imm}, {"sqdmullb_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"sqdmullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"sqdmullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"sqdmullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"sqdmullt_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"sqdmullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"sqdmullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"sqdmullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"sqneg_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT}, {"sqrdcmlah_z_zzz", &Simulator::SimulateSVEComplexIntMulAdd}, {"sqrdcmlah_z_zzzi_h", &Simulator::SimulateSVEComplexIntMulAdd}, @@ -295,24 +295,24 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = { {"umaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair}, {"uminp_z_p_zz", &Simulator::SimulateSVEIntArithPair}, {"umlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"umlalb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"umlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"umlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"umlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"umlalt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"umlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"umlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"umlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"umlslb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"umlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"umlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"umlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb}, - {"umlslt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm}, - {"umlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm}, + {"umlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, + {"umlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"umulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT}, {"umullb_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"umullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"umullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"umullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"umullt_z_zz", &Simulator::SimulateSVEIntMulLongVec}, {"umullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx}, - {"umullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm}, + {"umullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx}, {"uqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic}, {"uqrshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, {"uqrshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated}, @@ -2064,20 +2064,21 @@ void Simulator::Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr) { } void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16)); SimVRegister& zn = ReadVRegister(instr->GetRn()); SimVRegister temp, zm_idx, zn_b, zn_t; // Instead of calling the indexed form of the instruction logic, we call the - // vector form, which can reuse existing function logics without modification. + // vector form, which can reuse existing function logic without modification. // Select the specified elements based on the index input and than pack them // to the corresponding position. - Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11); - dup_elements_to_segments(kFormatVnS, temp, zm, index); - pack_even_elements(kFormatVnS, zm_idx, temp); - pack_even_elements(kFormatVnS, zn_b, zn); - pack_odd_elements(kFormatVnS, zn_t, zn); + VectorFormat vform_half = VectorFormatHalfWidth(vform); + dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex()); + pack_even_elements(vform_half, zm_idx, temp); + + pack_even_elements(vform_half, zn_b, zn); + pack_odd_elements(vform_half, zn_t, zn); switch (form_hash_) { case Hash("smullb_z_zzi_d"): @@ -2087,10 +2088,10 @@ void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) { VIXL_UNIMPLEMENTED(); break; case Hash("sqdmullb_z_zzi_d"): - sqdmull(kFormatVnD, zd, zn_b, zm_idx); + sqdmull(vform, zd, zn_b, zm_idx); break; case Hash("sqdmullt_z_zzi_d"): - sqdmull(kFormatVnD, zd, zn_t, zm_idx); + sqdmull(vform, zd, zn_t, zm_idx); break; case Hash("umullb_z_zzi_d"): VIXL_UNIMPLEMENTED(); @@ -2098,6 +2099,56 @@ void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) { case Hash("umullt_z_zzi_d"): VIXL_UNIMPLEMENTED(); break; + case Hash("smullb_z_zzi_s"): + VIXL_UNIMPLEMENTED(); + break; + case Hash("smullt_z_zzi_s"): + VIXL_UNIMPLEMENTED(); + break; + case Hash("sqdmullb_z_zzi_s"): + sqdmull(vform, zd, zn_b, zm_idx); + break; + case Hash("sqdmullt_z_zzi_s"): + sqdmull(vform, zd, zn_t, zm_idx); + break; + case Hash("umullb_z_zzi_s"): + VIXL_UNIMPLEMENTED(); + break; + case Hash("umullt_z_zzi_s"): + VIXL_UNIMPLEMENTED(); + break; + case Hash("smlalb_z_zzzi_s"): + case Hash("smlalb_z_zzzi_d"): + smlal(vform, zd, zn_b, zm_idx); + break; + case Hash("smlalt_z_zzzi_s"): + case Hash("smlalt_z_zzzi_d"): + smlal(vform, zd, zn_t, zm_idx); + break; + case Hash("smlslb_z_zzzi_s"): + case Hash("smlslb_z_zzzi_d"): + smlsl(vform, zd, zn_b, zm_idx); + break; + case Hash("smlslt_z_zzzi_s"): + case Hash("smlslt_z_zzzi_d"): + smlsl(vform, zd, zn_t, zm_idx); + break; + case Hash("umlalb_z_zzzi_s"): + case Hash("umlalb_z_zzzi_d"): + umlal(vform, zd, zn_b, zm_idx); + break; + case Hash("umlalt_z_zzzi_s"): + case Hash("umlalt_z_zzzi_d"): + umlal(vform, zd, zn_t, zm_idx); + break; + case Hash("umlslb_z_zzzi_s"): + case Hash("umlslb_z_zzzi_d"): + umlsl(vform, zd, zn_b, zm_idx); + break; + case Hash("umlslt_z_zzzi_s"): + case Hash("umlslt_z_zzzi_d"): + umlsl(vform, zd, zn_t, zm_idx); + break; default: VIXL_UNIMPLEMENTED(); } @@ -2205,46 +2256,6 @@ void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) { mov_merging(vform, zd, pg, result); } -void Simulator::Simulate_ZdS_ZnH_ZmH_imm(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16)); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - SimVRegister temp, zm_idx, zn_b, zn_t; - // Instead of calling the indexed form of the instruction logic, we call the - // vector form, which can reuse existing function logics without modification. - // Select the specified elements based on the index input and than pack them - // to the corresponding position. - Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11); - dup_elements_to_segments(kFormatVnH, temp, zm, index); - pack_even_elements(kFormatVnH, zm_idx, temp); - pack_even_elements(kFormatVnH, zn_b, zn); - pack_odd_elements(kFormatVnH, zn_t, zn); - - switch (form_hash_) { - case Hash("smullb_z_zzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smullt_z_zzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("sqdmullb_z_zzi_s"): - sqdmull(kFormatVnS, zd, zn_b, zm_idx); - break; - case Hash("sqdmullt_z_zzi_s"): - sqdmull(kFormatVnS, zd, zn_t, zm_idx); - break; - case Hash("umullb_z_zzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umullt_z_zzi_s"): - VIXL_UNIMPLEMENTED(); - break; - default: - VIXL_UNIMPLEMENTED(); - } -} - void Simulator::Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr) { SimVRegister& zd = ReadVRegister(instr->GetRd()); USE(zd); @@ -2776,18 +2787,6 @@ void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) { pack_odd_elements(kFormatVnS, zn_t, zn); switch (form_hash_) { - case Hash("smlalb_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlalt_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlslb_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlslt_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; case Hash("sqdmlalb_z_zzzi_d"): sqdmlal(kFormatVnD, zda, zn_b, zm_idx); break; @@ -2800,18 +2799,6 @@ void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) { case Hash("sqdmlslt_z_zzzi_d"): sqdmlsl(kFormatVnD, zda, zn_t, zm_idx); break; - case Hash("umlalb_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlalt_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlslb_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlslt_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; default: VIXL_UNIMPLEMENTED(); } @@ -2895,18 +2882,6 @@ void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) { case Hash("fmlslt_z_zzzi_s"): VIXL_UNIMPLEMENTED(); break; - case Hash("smlalb_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlalt_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlslb_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("smlslt_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; case Hash("sqdmlalb_z_zzzi_s"): sqdmlal(kFormatVnS, zda, zn_b, zm_idx); break; @@ -2919,18 +2894,6 @@ void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) { case Hash("sqdmlslt_z_zzzi_s"): sqdmlsl(kFormatVnS, zda, zn_t, zm_idx); break; - case Hash("umlalb_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlalt_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlslb_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - case Hash("umlslt_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; default: VIXL_UNIMPLEMENTED(); } diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 3f5ee914..bac4863d 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -1178,7 +1178,6 @@ class Simulator : public DecoderVisitor { void Simulate_ZdS_PgM_ZnD(const Instruction* instr); void Simulate_ZdS_PgM_ZnH(const Instruction* instr); void Simulate_ZdS_PgM_ZnS(const Instruction* instr); - void Simulate_ZdS_ZnH_ZmH_imm(const Instruction* instr); void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr); void Simulate_ZdT_PgM_ZnT(const Instruction* instr); void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr); diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc index 134ec52f..bc683624 100644 --- a/test/aarch64/test-disasm-sve-aarch64.cc +++ b/test/aarch64/test-disasm-sve-aarch64.cc @@ -8611,6 +8611,61 @@ TEST(sve2_floating_multiply_add_long_vector) { CLEANUP(); } +TEST(sve2_mla_long_index) { + SETUP(); + + COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3), + "smlalb z11.d, z29.s, z0.s[3]"); + COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7), + "smlalb z18.s, z17.h, z0.h[7]"); + COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0), + "smlalt z10.d, z30.s, z15.s[0]"); + COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0), + "smlalt z23.s, z31.h, z7.h[0]"); + COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1), + "smlslb z12.d, z23.s, z3.s[1]"); + COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2), + "smlslb z5.s, z4.h, z4.h[2]"); + COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3), + "smlslt z7.d, z9.s, z6.s[3]"); + COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4), + "smlslt z9.s, z21.h, z3.h[4]"); + COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0), + "umlalb z9.d, z1.s, z11.s[0]"); + COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6), + "umlalb z9.s, z5.h, z1.h[6]"); + COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1), + "umlalt z6.d, z17.s, z14.s[1]"); + COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7), + "umlalt z9.s, z11.h, z3.h[7]"); + COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2), + "umlslb z12.d, z15.s, z9.s[2]"); + COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0), + "umlslb z14.s, z10.h, z2.h[0]"); + COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3), + "umlslt z12.d, z28.s, z8.s[3]"); + COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1), + "umlslt z24.s, z12.h, z6.h[1]"); + + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1), + "movprfx z2, z23\n" + "umlslt z2.s, z12.h, z6.h[1]"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z2.h, z6.h[1]\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z12.h, z2.h[1]\n" + "mov z2.d, z31.d"); + COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1), + "movprfx z31, z23\n" + "umlslt z31.s, z2.h, z2.h[1]\n" + "mov z2.d, z31.d"); + + CLEANUP(); +} + TEST(sve2_all_instructions) { // TODO: split these instructions into more logical groups. SETUP(); @@ -8681,22 +8736,6 @@ TEST(sve2_all_instructions) { // <Zm>.<Tb>"); // COMPARE_PREFIX(pmullt(z31.Vn?(), z30, z26), "pmullt <Zd>.<T>, <Zn>.<Tb>, // <Zm>.<Tb>"); - // COMPARE_PREFIX(smlalb(z11.VnD(), z29.VnS()), "smlalb z11.d, z29.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlalb(z18.VnS(), z17.VnH()), "smlalb z18.d, z17.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlalt(z10.VnS(), z30.VnH()), "smlalt z10.d, z30.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlalt(z23.VnD(), z31.VnS()), "smlalt z23.d, z31.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlslb(z12.VnD(), z23.VnS()), "smlslb z12.d, z23.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlslb(z5.VnS(), z4.VnH()), "smlslb z5.d, z4.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlslt(z7.VnD(), z9.VnS()), "smlslt z7.d, z9.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(smlslt(z9.VnS(), z21.VnH()), "smlslt z9.d, z21.s, - // <Zm>.s[<imm>]"); // COMPARE_PREFIX(smullb(z10.VnD(), z4, z4), "smullb z10.d, z4, z4"); // COMPARE_PREFIX(smullb(z10.VnH(), z4, z4), "smullb z10.h, z4, z4"); // COMPARE_PREFIX(smullb(z10.VnS(), z4, z4), "smullb z10.s, z4, z4"); @@ -8773,22 +8812,6 @@ TEST(sve2_all_instructions) { // <Zm>.d[<imm>]"); // COMPARE_PREFIX(sqrdmulh(z3.VnH(), z29.VnH()), "sqrdmulh z3.d, z29.d, // <Zm>.d[<imm>]"); - // COMPARE_PREFIX(umlalb(z9.VnD(), z1.VnS()), "umlalb z9.d, z1.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlalb(z9.VnS(), z5.VnH()), "umlalb z9.d, z5.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlalt(z6.VnS(), z17.VnH()), "umlalt z6.d, z17.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlalt(z9.VnD(), z11.VnS()), "umlalt z9.d, z11.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlslb(z12.VnD(), z15.VnS()), "umlslb z12.d, z15.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlslb(z14.VnS(), z10.VnH()), "umlslb z14.d, z10.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlslt(z12.VnD(), z28.VnS()), "umlslt z12.d, z28.s, - // <Zm>.s[<imm>]"); - // COMPARE_PREFIX(umlslt(z24.VnS(), z12.VnH()), "umlslt z24.d, z12.s, - // <Zm>.s[<imm>]"); // COMPARE_PREFIX(umullb(z12.VnD(), z25, z22), "umullb z12.d, z25, z22"); // COMPARE_PREFIX(umullb(z12.VnH(), z25, z22), "umullb z12.h, z25, z22"); // COMPARE_PREFIX(umullb(z12.VnS(), z25, z22), "umullb z12.s, z25, z22"); diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc index d1f302c0..c98d6625 100644 --- a/test/aarch64/test-simulator-sve2-aarch64.cc +++ b/test/aarch64/test-simulator-sve2-aarch64.cc @@ -7534,5 +7534,149 @@ TEST_SVE(sve2_floating_multiply_add_long_vector) { } } +TEST_SVE(sve2_mla_long_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1] + // vl128 state = 0xd08dbe24 + __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1] + // vl128 state = 0x56f6f237 + __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1] + // vl128 state = 0x00f89e4d + __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1] + // vl128 state = 0xca4e469e + __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3] + // vl128 state = 0xd4b18276 + __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3] + // vl128 state = 0x8650a79e + __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3] + // vl128 state = 0x6fa1a501 + __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3] + // vl128 state = 0x1a56a5d4 + __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7] + // vl128 state = 0xfdb18057 + __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6] + // vl128 state = 0xb46b6c28 + __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6] + // vl128 state = 0x623c62c3 + __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6] + // vl128 state = 0x2abab4d3 + __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6] + // vl128 state = 0x7a028731 + __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2] + // vl128 state = 0xf48f6936 + __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0] + // vl128 state = 0xbcdf888d + __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5] + // vl128 state = 0x5060778e + __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5] + // vl128 state = 0x16da3835 + __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4] + // vl128 state = 0xac7fb4d0 + __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4] + // vl128 state = 0x8d05433b + __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5] + // vl128 state = 0x62630101 + __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5] + // vl128 state = 0x31ae445b + __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5] + // vl128 state = 0x539a5875 + __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5] + // vl128 state = 0x07d4bf73 + __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5] + // vl128 state = 0x314f48a8 + __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5] + // vl128 state = 0x91bd2c17 + __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5] + // vl128 state = 0x4cbf4360 + __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3] + // vl128 state = 0xe94e76a9 + __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3] + // vl128 state = 0xd0c2c4cc + __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3] + // vl128 state = 0xc64d6839 + __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3] + // vl128 state = 0xa74358aa + __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3] + // vl128 state = 0xb8d9664b + __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3] + // vl128 state = 0xf1032ab4 + __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3] + // vl128 state = 0x763732f4 + __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1] + // vl128 state = 0xdcf39367 + __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0] + // vl128 state = 0x5ea67d82 + __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2] + // vl128 state = 0x55da0908 + __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2] + // vl128 state = 0x69d105f5 + __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6] + // vl128 state = 0x191bc065 + __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6] + // vl128 state = 0xbf62d2a0 + __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0] + // vl128 state = 0x43803a21 + __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1] + // vl128 state = 0x0b33725c + __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1] + // vl128 state = 0x0059a0f5 + __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3] + // vl128 state = 0xb587057f + __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3] + // vl128 state = 0x0bfa30c6 + __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3] + // vl128 state = 0x151045b4 + __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3] + // vl128 state = 0xedb7fca9 + __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2] + // vl128 state = 0xb68216f9 + __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2] + // vl128 state = 0x35447b11 + __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2] + // vl128 state = 0xf532285f + __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2] + // vl128 state = 0xd414889b + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0xd414889b, + 0x79d8f659, + 0xe2c8f06b, + 0x91aadf3d, + 0xffb92c3e, + 0xc2d3138e, + 0xdd9f4396, + 0xce39a88e, + 0xfe68a5ca, + 0xdcb072b2, + 0x3756ede6, + 0x5c2eef22, + 0x01fd02a4, + 0xdd8d4890, + 0x87500dc9, + 0x8c895325, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + } // namespace aarch64 } // namespace vixl |