aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2020-12-02 17:59:09 +0000
committerMartyn Capewell <martyn.capewell@arm.com>2021-01-08 11:19:31 +0000
commit2e66dc71cadca95042b675dc72c5b6ed00e18562 (patch)
tree4a5df7aef2935437e9214b16b0be7b1e000b5bd2
parentba9a148b62237919c4853db5477ca22540691848 (diff)
[sve2] Implement indexed multiply-accumulate long
Implement indexed multiply-accumulate and subtract instructions for signed and unsigned inputs. Change-Id: Ifd16fdf1b2cde4aae11343dfa377400f1ab7fc35
-rw-r--r--src/aarch64/assembler-aarch64.h41
-rw-r--r--src/aarch64/assembler-sve-aarch64.cc232
-rw-r--r--src/aarch64/disasm-aarch64.cc32
-rw-r--r--src/aarch64/instructions-aarch64.cc25
-rw-r--r--src/aarch64/instructions-aarch64.h1
-rw-r--r--src/aarch64/macro-assembler-aarch64.h86
-rw-r--r--src/aarch64/macro-assembler-sve-aarch64.cc10
-rw-r--r--src/aarch64/simulator-aarch64.cc201
-rw-r--r--src/aarch64/simulator-aarch64.h1
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc87
-rw-r--r--test/aarch64/test-simulator-sve2-aarch64.cc144
11 files changed, 460 insertions, 400 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index e665cbfd..43865312 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -6150,25 +6150,37 @@ class Assembler : public vixl::internal::AssemblerBase {
const ZRegister& zm);
// Signed multiply-add long to accumulator (bottom, indexed).
- void smlalb(const ZRegister& zda, const ZRegister& zn);
+ void smlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Signed multiply-add long to accumulator (bottom).
void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Signed multiply-add long to accumulator (top, indexed).
- void smlalt(const ZRegister& zda, const ZRegister& zn);
+ void smlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Signed multiply-add long to accumulator (top).
void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Signed multiply-subtract long from accumulator (bottom, indexed).
- void smlslb(const ZRegister& zda, const ZRegister& zn);
+ void smlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Signed multiply-subtract long from accumulator (bottom).
void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Signed multiply-subtract long from accumulator (top, indexed).
- void smlslt(const ZRegister& zda, const ZRegister& zn);
+ void smlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Signed multiply-subtract long from accumulator (top).
void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
@@ -6560,25 +6572,37 @@ class Assembler : public vixl::internal::AssemblerBase {
const ZRegister& zm);
// Unsigned multiply-add long to accumulator (bottom, indexed).
- void umlalb(const ZRegister& zda, const ZRegister& zn);
+ void umlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Unsigned multiply-add long to accumulator (bottom).
void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Unsigned multiply-add long to accumulator (top, indexed).
- void umlalt(const ZRegister& zda, const ZRegister& zn);
+ void umlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Unsigned multiply-add long to accumulator (top).
void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Unsigned multiply-subtract long from accumulator (bottom, indexed).
- void umlslb(const ZRegister& zda, const ZRegister& zn);
+ void umlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Unsigned multiply-subtract long from accumulator (bottom).
void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
// Unsigned multiply-subtract long from accumulator (top, indexed).
- void umlslt(const ZRegister& zda, const ZRegister& zn);
+ void umlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
// Unsigned multiply-subtract long from accumulator (top).
void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
@@ -7695,6 +7719,7 @@ class Assembler : public vixl::internal::AssemblerBase {
Instr op_s,
Instr op_d);
+ Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
const PRegister& pg,
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
index 9af19463..6d9b7987 100644
--- a/src/aarch64/assembler-sve-aarch64.cc
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -1560,15 +1560,15 @@ Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
op = op_h;
break;
case kSRegSizeInBytesLog2:
- VIXL_ASSERT(zm.GetCode() <= 7);
- VIXL_ASSERT(IsUint2(index));
+ VIXL_CHECK(zm.GetCode() <= 7);
+ VIXL_CHECK(IsUint2(index));
// Top two bits of "zm" encode the index.
zm_with_index |= (index & 3) << (Rm_offset + 3);
op = op_s;
break;
case kDRegSizeInBytesLog2:
- VIXL_ASSERT(zm.GetCode() <= 15);
- VIXL_ASSERT(IsUint1(index));
+ VIXL_CHECK(zm.GetCode() <= 15);
+ VIXL_CHECK(IsUint1(index));
// Top bit of "zm" encodes the index.
zm_with_index |= (index & 1) << (Rm_offset + 4);
op = op_d;
@@ -1579,6 +1579,28 @@ Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
return op | zm_with_index | size;
}
+Instr Assembler::SVEMulLongIndexHelper(const ZRegister& zm, int index) {
+ Instr imm_field;
+ Instr zm_id;
+ if (zm.IsLaneSizeH()) {
+ VIXL_CHECK(zm.GetCode() <= 7);
+ VIXL_CHECK(IsUint3(index));
+ imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19;
+ zm_id = Rx<18, 16>(zm);
+ } else {
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_CHECK(zm.GetCode() <= 15);
+ VIXL_CHECK(IsUint2(index));
+ imm_field = ExtractBit(index, 1) << 20;
+ zm_id = Rx<19, 16>(zm);
+ }
+
+ // Synthesize the low part of immediate encoding.
+ imm_field |= ExtractBit(index, 0) << 11;
+
+ return zm_id | imm_field;
+}
+
// SVEFPMulAddIndex.
void Assembler::fmla(const ZRegister& zda,
@@ -7746,19 +7768,32 @@ void Assembler::sminp(const ZRegister& zd,
Emit(0x4416a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
}
-// This prototype maps to 2 instruction encodings:
-// smlalb_z_zzzi_d
-// smlalb_z_zzzi_s
-void Assembler::smlalb(const ZRegister& zda, const ZRegister& zn) {
- // SMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1000 .0.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e08000 | Rd(zda) | Rn(zn));
-}
+#define VIXL_SVE_MULL_INDEX_LIST(V) \
+ V(smlalb, 0x44a08000) \
+ V(smlalt, 0x44a08400) \
+ V(smlslb, 0x44a0a000) \
+ V(smlslt, 0x44a0a400) \
+ V(umlalb, 0x44a09000) \
+ V(umlalt, 0x44a09400) \
+ V(umlslb, 0x44a0b000) \
+ V(umlslt, 0x44a0b400) \
+ V(sqdmullb, 0x44a0e000) \
+ V(sqdmullt, 0x44a0e400)
+
+#define VIXL_DEFINE_ASM_FUNC(MNE, OP) \
+ void Assembler::MNE(const ZRegister& zda, \
+ const ZRegister& zn, \
+ const ZRegister& zm, \
+ int index) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \
+ VIXL_ASSERT(AreSameLaneSize(zn, zm)); \
+ VIXL_ASSERT(zda.IsLaneSizeD() || zda.IsLaneSizeS()); \
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); \
+ Instr zm_with_index = SVEMulLongIndexHelper(zm, index); \
+ Emit(OP | SVESize(zda) | Rd(zda) | Rn(zn) | zm_with_index); \
+ }
+VIXL_SVE_MULL_INDEX_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FuNC
void Assembler::smlalb(const ZRegister& zda,
const ZRegister& zn,
@@ -7775,20 +7810,6 @@ void Assembler::smlalb(const ZRegister& zda,
Emit(0x44004000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// smlalt_z_zzzi_d
-// smlalt_z_zzzi_s
-void Assembler::smlalt(const ZRegister& zda, const ZRegister& zn) {
- // SMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1000 .1.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e08400 | Rd(zda) | Rn(zn));
-}
-
void Assembler::smlalt(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -7804,20 +7825,6 @@ void Assembler::smlalt(const ZRegister& zda,
Emit(0x44004400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// smlslb_z_zzzi_d
-// smlslb_z_zzzi_s
-void Assembler::smlslb(const ZRegister& zda, const ZRegister& zn) {
- // SMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1010 .0.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e0a000 | Rd(zda) | Rn(zn));
-}
-
void Assembler::smlslb(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -7833,20 +7840,6 @@ void Assembler::smlslb(const ZRegister& zda,
Emit(0x44005000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// smlslt_z_zzzi_d
-// smlslt_z_zzzi_s
-void Assembler::smlslt(const ZRegister& zda, const ZRegister& zn) {
- // SMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1010 .1.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e0a400 | Rd(zda) | Rn(zn));
-}
-
void Assembler::smlslt(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -8229,37 +8222,6 @@ void Assembler::sqdmulh(const ZRegister& zd,
void Assembler::sqdmullb(const ZRegister& zd,
const ZRegister& zn,
- const ZRegister& zm,
- int index) {
- // SQDMULLB <Zd>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1110 .0.. .... ....
- // size<23:22> | opc<20:16> | il<11> | T<10> | Zn<9:5> | Zd<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
- VIXL_ASSERT(AreSameLaneSize(zn, zm));
- VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeS());
-
- Instr imm_field;
- Instr zm_id;
- if (zd.IsLaneSizeS()) {
- VIXL_ASSERT(IsUint7(index));
- imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19;
- zm_id = Rx<18, 16>(zm);
- } else {
- VIXL_ASSERT(zd.IsLaneSizeD());
- VIXL_ASSERT(IsUint3(index));
- imm_field = ExtractBit(index, 1) << 20;
- zm_id = Rx<19, 16>(zm);
- }
-
- // Synthesize the low part of immediate encoding.
- imm_field |= ExtractBit(index, 0) << 11;
-
- Emit(0x44a0e000 | SVESize(zd) | Rd(zd) | Rn(zn) | zm_id | imm_field);
-}
-
-void Assembler::sqdmullb(const ZRegister& zd,
- const ZRegister& zn,
const ZRegister& zm) {
// SQDMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
// 0100 0101 ..0. .... 0110 00.. .... ....
@@ -8273,40 +8235,6 @@ void Assembler::sqdmullb(const ZRegister& zd,
Emit(0x45006000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// sqdmullt_z_zzi_d
-// sqdmullt_z_zzi_s
-void Assembler::sqdmullt(const ZRegister& zd,
- const ZRegister& zn,
- const ZRegister& zm,
- int index) {
- // SQDMULLT <Zd>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1110 .1.. .... ....
- // size<23:22> | opc<20:16> | il<11> | T<10> | Zn<9:5> | Zd<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
- VIXL_ASSERT(AreSameLaneSize(zn, zm));
- VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
-
- Instr imm_field;
- Instr zm_id;
- if (zd.IsLaneSizeS()) {
- VIXL_ASSERT(IsUint7(index));
- imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19;
- zm_id = Rx<18, 16>(zm);
- } else {
- VIXL_ASSERT(zd.IsLaneSizeD());
- VIXL_ASSERT(IsUint3(index));
- imm_field = ExtractBit(index, 1) << 20;
- zm_id = Rx<19, 16>(zm);
- }
-
- // Synthesize the low part of immediate encoding.
- imm_field |= ExtractBit(index, 0) << 11;
-
- Emit(0x44a0e400 | SVESize(zd) | Rd(zd) | Rn(zn) | zm_id | imm_field);
-}
-
void Assembler::sqdmullt(const ZRegister& zd,
const ZRegister& zn,
const ZRegister& zm) {
@@ -9241,20 +9169,6 @@ void Assembler::uminp(const ZRegister& zd,
Emit(0x4417a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
}
-// This prototype maps to 2 instruction encodings:
-// umlalb_z_zzzi_d
-// umlalb_z_zzzi_s
-void Assembler::umlalb(const ZRegister& zda, const ZRegister& zn) {
- // UMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1001 .0.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e09000 | Rd(zda) | Rn(zn));
-}
-
void Assembler::umlalb(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -9270,20 +9184,6 @@ void Assembler::umlalb(const ZRegister& zda,
Emit(0x44004800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// umlalt_z_zzzi_d
-// umlalt_z_zzzi_s
-void Assembler::umlalt(const ZRegister& zda, const ZRegister& zn) {
- // UMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1001 .1.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e09400 | Rd(zda) | Rn(zn));
-}
-
void Assembler::umlalt(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -9299,20 +9199,6 @@ void Assembler::umlalt(const ZRegister& zda,
Emit(0x44004c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// umlslb_z_zzzi_d
-// umlslb_z_zzzi_s
-void Assembler::umlslb(const ZRegister& zda, const ZRegister& zn) {
- // UMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1011 .0.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e0b000 | Rd(zda) | Rn(zn));
-}
-
void Assembler::umlslb(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
@@ -9328,20 +9214,6 @@ void Assembler::umlslb(const ZRegister& zda,
Emit(0x44005800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
-// This prototype maps to 2 instruction encodings:
-// umlslt_z_zzzi_d
-// umlslt_z_zzzi_s
-void Assembler::umlslt(const ZRegister& zda, const ZRegister& zn) {
- // UMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
- // 0100 0100 111. .... 1011 .1.. .... ....
- // size<23:22> | opc<20:16> | S<13> | U<12> | il<11> | T<10> | Zn<9:5> |
- // Zda<4:0>
-
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
-
- Emit(0x44e0b400 | Rd(zda) | Rn(zn));
-}
-
void Assembler::umlslt(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm) {
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 3ba3aafc..b3f38997 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -145,17 +145,17 @@ Disassembler::FormToVisitorFnMap Disassembler::form_to_visitor_ = {
{"smaxp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
{"sminp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
{"smlalb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"smlalb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"smlalb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"smlalb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlalb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"smlalt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"smlalt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"smlalt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"smlalt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlalt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"smlslb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"smlslb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"smlslb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"smlslb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlslb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"smlslt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"smlslt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"smlslt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"smlslt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlslt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"smulh_z_zz", &Disassembler::Disassemble_ZdT_ZnT_ZmT},
{"smullb_z_zz", &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
{"smullb_z_zzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
@@ -270,17 +270,17 @@ Disassembler::FormToVisitorFnMap Disassembler::form_to_visitor_ = {
{"umaxp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
{"uminp_z_p_zz", &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
{"umlalb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"umlalb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"umlalb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"umlalb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlalb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"umlalt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"umlalt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"umlalt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"umlalt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlalt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"umlslb_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"umlslb_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"umlslb_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"umlslb_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlslb_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"umlslt_z_zzz", &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
- {"umlslt_z_zzzi_d", &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
- {"umlslt_z_zzzi_s", &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"umlslt_z_zzzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlslt_z_zzzi_s", &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
{"umulh_z_zz", &Disassembler::Disassemble_ZdT_ZnT_ZmT},
{"umullb_z_zz", &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
{"umullb_z_zzi_d", &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index b918cf3f..0f134600 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -638,6 +638,31 @@ std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const {
return std::make_pair(reg_code, index);
}
+// Get the register and index for SVE indexed long multiplies encoded in the
+// forms:
+// .h : Zm = <18:16>, index = <20:19><11>
+// .s : Zm = <19:16>, index = <20><11>
+std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
+ int reg_code = GetRmLow16();
+ int index = ExtractBit(11);
+
+ // For long multiplies, the SVE size field <23:22> encodes the destination
+ // element size. The source element size is half the width.
+ switch (GetSVEVectorFormat()) {
+ case kFormatVnS:
+ reg_code &= 7;
+ index |= ExtractBits(20, 19) << 1;
+ break;
+ case kFormatVnD:
+ index |= ExtractBit(20) << 1;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ return std::make_pair(reg_code, index);
+}
+
// Logical immediates can't encode zero, so a return value of zero is used to
// indicate a failure case. Specifically, where the constraints on imm_s are
// not met.
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index 318eac54..3ef94a4c 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -350,6 +350,7 @@ class Instruction {
std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
std::pair<int, int> GetSVEMulZmAndIndex() const;
+ std::pair<int, int> GetSVEMulLongZmAndIndex() const;
std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 7e088eeb..be89dbac 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -6760,38 +6760,38 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
const PRegisterM& pg,
const ZRegister& zn,
const ZRegister& zm);
- void Smlalb(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- smlalb(zda, zn);
- }
+ void Smlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
void Smlalb(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Smlalt(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- smlalt(zda, zn);
- }
+ void Smlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
void Smlalt(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Smlslb(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- smlslb(zda, zn);
- }
+ void Smlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
void Smlslb(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Smlslt(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- smlslt(zda, zn);
- }
+ void Smlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
void Smlslt(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
@@ -7199,39 +7199,39 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
const PRegisterM& pg,
const ZRegister& zn,
const ZRegister& zm);
- void Umlalb(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- umlalb(zda, zn);
- }
+ void Umlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
void Umlalb(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Umlalt(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- umlalt(zda, zn);
- }
- void Umlalt(const ZRegister& zda,
+ void Umlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlalt(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Umlslb(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- umlslb(zda, zn);
- }
- void Umlslb(const ZRegister& zda,
+ void Umlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlslb(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
- void Umlslt(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- umlslt(zda, zn);
- }
- void Umlslt(const ZRegister& zda,
+ void Umlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlslt(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm);
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
index 171e5809..b03b8fae 100644
--- a/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -1862,7 +1862,15 @@ VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
- V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper)
+ V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
+ V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \
+ V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \
+ V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \
+ V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \
+ V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \
+ V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \
+ V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \
+ V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
void MacroAssembler::MASMFN(const ZRegister& zd, \
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 0f697084..c1753692 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -170,24 +170,24 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = {
{"smaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
{"sminp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
{"smlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"smlalb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"smlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"smlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"smlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"smlalt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"smlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"smlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"smlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"smlslb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"smlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"smlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"smlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"smlslt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"smlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"smlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"smulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
{"smullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"smullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"smullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"smullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"smullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"smullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"smullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"smullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"splice_z_p_zz_con", &Simulator::VisitSVEVectorSplice},
{"sqabs_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT},
{"sqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
@@ -212,10 +212,10 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = {
{"sqdmulh_z_zzi_s", &Simulator::Simulate_ZdS_ZnS_ZmS_imm},
{"sqdmullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"sqdmullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"sqdmullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"sqdmullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"sqdmullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"sqdmullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"sqdmullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"sqdmullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"sqneg_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT},
{"sqrdcmlah_z_zzz", &Simulator::SimulateSVEComplexIntMulAdd},
{"sqrdcmlah_z_zzzi_h", &Simulator::SimulateSVEComplexIntMulAdd},
@@ -295,24 +295,24 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = {
{"umaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
{"uminp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
{"umlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"umlalb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"umlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"umlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"umlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"umlalt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"umlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"umlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"umlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"umlslb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"umlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"umlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"umlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
- {"umlslt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
- {"umlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"umlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"umulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
{"umullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"umullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"umullb_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"umullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"umullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
{"umullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
- {"umullt_z_zzi_s", &Simulator::Simulate_ZdS_ZnH_ZmH_imm},
+ {"umullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
{"uqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
{"uqrshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
{"uqrshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
@@ -2064,20 +2064,21 @@ void Simulator::Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr) {
}
void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister& zd = ReadVRegister(instr->GetRd());
- SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimVRegister temp, zm_idx, zn_b, zn_t;
// Instead of calling the indexed form of the instruction logic, we call the
- // vector form, which can reuse existing function logics without modification.
+ // vector form, which can reuse existing function logic without modification.
// Select the specified elements based on the index input and than pack them
// to the corresponding position.
- Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
- dup_elements_to_segments(kFormatVnS, temp, zm, index);
- pack_even_elements(kFormatVnS, zm_idx, temp);
- pack_even_elements(kFormatVnS, zn_b, zn);
- pack_odd_elements(kFormatVnS, zn_t, zn);
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
+ pack_even_elements(vform_half, zm_idx, temp);
+
+ pack_even_elements(vform_half, zn_b, zn);
+ pack_odd_elements(vform_half, zn_t, zn);
switch (form_hash_) {
case Hash("smullb_z_zzi_d"):
@@ -2087,10 +2088,10 @@ void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
VIXL_UNIMPLEMENTED();
break;
case Hash("sqdmullb_z_zzi_d"):
- sqdmull(kFormatVnD, zd, zn_b, zm_idx);
+ sqdmull(vform, zd, zn_b, zm_idx);
break;
case Hash("sqdmullt_z_zzi_d"):
- sqdmull(kFormatVnD, zd, zn_t, zm_idx);
+ sqdmull(vform, zd, zn_t, zm_idx);
break;
case Hash("umullb_z_zzi_d"):
VIXL_UNIMPLEMENTED();
@@ -2098,6 +2099,56 @@ void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
case Hash("umullt_z_zzi_d"):
VIXL_UNIMPLEMENTED();
break;
+ case Hash("smullb_z_zzi_s"):
+ VIXL_UNIMPLEMENTED();
+ break;
+ case Hash("smullt_z_zzi_s"):
+ VIXL_UNIMPLEMENTED();
+ break;
+ case Hash("sqdmullb_z_zzi_s"):
+ sqdmull(vform, zd, zn_b, zm_idx);
+ break;
+ case Hash("sqdmullt_z_zzi_s"):
+ sqdmull(vform, zd, zn_t, zm_idx);
+ break;
+ case Hash("umullb_z_zzi_s"):
+ VIXL_UNIMPLEMENTED();
+ break;
+ case Hash("umullt_z_zzi_s"):
+ VIXL_UNIMPLEMENTED();
+ break;
+ case Hash("smlalb_z_zzzi_s"):
+ case Hash("smlalb_z_zzzi_d"):
+ smlal(vform, zd, zn_b, zm_idx);
+ break;
+ case Hash("smlalt_z_zzzi_s"):
+ case Hash("smlalt_z_zzzi_d"):
+ smlal(vform, zd, zn_t, zm_idx);
+ break;
+ case Hash("smlslb_z_zzzi_s"):
+ case Hash("smlslb_z_zzzi_d"):
+ smlsl(vform, zd, zn_b, zm_idx);
+ break;
+ case Hash("smlslt_z_zzzi_s"):
+ case Hash("smlslt_z_zzzi_d"):
+ smlsl(vform, zd, zn_t, zm_idx);
+ break;
+ case Hash("umlalb_z_zzzi_s"):
+ case Hash("umlalb_z_zzzi_d"):
+ umlal(vform, zd, zn_b, zm_idx);
+ break;
+ case Hash("umlalt_z_zzzi_s"):
+ case Hash("umlalt_z_zzzi_d"):
+ umlal(vform, zd, zn_t, zm_idx);
+ break;
+ case Hash("umlslb_z_zzzi_s"):
+ case Hash("umlslb_z_zzzi_d"):
+ umlsl(vform, zd, zn_b, zm_idx);
+ break;
+ case Hash("umlslt_z_zzzi_s"):
+ case Hash("umlslt_z_zzzi_d"):
+ umlsl(vform, zd, zn_t, zm_idx);
+ break;
default:
VIXL_UNIMPLEMENTED();
}
@@ -2205,46 +2256,6 @@ void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
mov_merging(vform, zd, pg, result);
}
-void Simulator::Simulate_ZdS_ZnH_ZmH_imm(const Instruction* instr) {
- SimVRegister& zd = ReadVRegister(instr->GetRd());
- SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
- SimVRegister& zn = ReadVRegister(instr->GetRn());
-
- SimVRegister temp, zm_idx, zn_b, zn_t;
- // Instead of calling the indexed form of the instruction logic, we call the
- // vector form, which can reuse existing function logics without modification.
- // Select the specified elements based on the index input and than pack them
- // to the corresponding position.
- Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
- dup_elements_to_segments(kFormatVnH, temp, zm, index);
- pack_even_elements(kFormatVnH, zm_idx, temp);
- pack_even_elements(kFormatVnH, zn_b, zn);
- pack_odd_elements(kFormatVnH, zn_t, zn);
-
- switch (form_hash_) {
- case Hash("smullb_z_zzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smullt_z_zzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("sqdmullb_z_zzi_s"):
- sqdmull(kFormatVnS, zd, zn_b, zm_idx);
- break;
- case Hash("sqdmullt_z_zzi_s"):
- sqdmull(kFormatVnS, zd, zn_t, zm_idx);
- break;
- case Hash("umullb_z_zzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umullt_z_zzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- default:
- VIXL_UNIMPLEMENTED();
- }
-}
-
void Simulator::Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
USE(zd);
@@ -2776,18 +2787,6 @@ void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
pack_odd_elements(kFormatVnS, zn_t, zn);
switch (form_hash_) {
- case Hash("smlalb_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlalt_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlslb_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlslt_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
case Hash("sqdmlalb_z_zzzi_d"):
sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
break;
@@ -2800,18 +2799,6 @@ void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
case Hash("sqdmlslt_z_zzzi_d"):
sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
break;
- case Hash("umlalb_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlalt_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlslb_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlslt_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
default:
VIXL_UNIMPLEMENTED();
}
@@ -2895,18 +2882,6 @@ void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
case Hash("fmlslt_z_zzzi_s"):
VIXL_UNIMPLEMENTED();
break;
- case Hash("smlalb_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlalt_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlslb_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("smlslt_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
case Hash("sqdmlalb_z_zzzi_s"):
sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
break;
@@ -2919,18 +2894,6 @@ void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
case Hash("sqdmlslt_z_zzzi_s"):
sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
break;
- case Hash("umlalb_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlalt_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlslb_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- case Hash("umlslt_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
default:
VIXL_UNIMPLEMENTED();
}
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 3f5ee914..bac4863d 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1178,7 +1178,6 @@ class Simulator : public DecoderVisitor {
void Simulate_ZdS_PgM_ZnD(const Instruction* instr);
void Simulate_ZdS_PgM_ZnH(const Instruction* instr);
void Simulate_ZdS_PgM_ZnS(const Instruction* instr);
- void Simulate_ZdS_ZnH_ZmH_imm(const Instruction* instr);
void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr);
void Simulate_ZdT_PgM_ZnT(const Instruction* instr);
void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 134ec52f..bc683624 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -8611,6 +8611,61 @@ TEST(sve2_floating_multiply_add_long_vector) {
CLEANUP();
}
+TEST(sve2_mla_long_index) {
+ SETUP();
+
+ COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3),
+ "smlalb z11.d, z29.s, z0.s[3]");
+ COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7),
+ "smlalb z18.s, z17.h, z0.h[7]");
+ COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0),
+ "smlalt z10.d, z30.s, z15.s[0]");
+ COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0),
+ "smlalt z23.s, z31.h, z7.h[0]");
+ COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1),
+ "smlslb z12.d, z23.s, z3.s[1]");
+ COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2),
+ "smlslb z5.s, z4.h, z4.h[2]");
+ COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3),
+ "smlslt z7.d, z9.s, z6.s[3]");
+ COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4),
+ "smlslt z9.s, z21.h, z3.h[4]");
+ COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0),
+ "umlalb z9.d, z1.s, z11.s[0]");
+ COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6),
+ "umlalb z9.s, z5.h, z1.h[6]");
+ COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1),
+ "umlalt z6.d, z17.s, z14.s[1]");
+ COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7),
+ "umlalt z9.s, z11.h, z3.h[7]");
+ COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2),
+ "umlslb z12.d, z15.s, z9.s[2]");
+ COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0),
+ "umlslb z14.s, z10.h, z2.h[0]");
+ COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3),
+ "umlslt z12.d, z28.s, z8.s[3]");
+ COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1),
+ "umlslt z24.s, z12.h, z6.h[1]");
+
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1),
+ "movprfx z2, z23\n"
+ "umlslt z2.s, z12.h, z6.h[1]");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z2.h, z6.h[1]\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z12.h, z2.h[1]\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z2.h, z2.h[1]\n"
+ "mov z2.d, z31.d");
+
+ CLEANUP();
+}
+
TEST(sve2_all_instructions) {
// TODO: split these instructions into more logical groups.
SETUP();
@@ -8681,22 +8736,6 @@ TEST(sve2_all_instructions) {
// <Zm>.<Tb>");
// COMPARE_PREFIX(pmullt(z31.Vn?(), z30, z26), "pmullt <Zd>.<T>, <Zn>.<Tb>,
// <Zm>.<Tb>");
- // COMPARE_PREFIX(smlalb(z11.VnD(), z29.VnS()), "smlalb z11.d, z29.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlalb(z18.VnS(), z17.VnH()), "smlalb z18.d, z17.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlalt(z10.VnS(), z30.VnH()), "smlalt z10.d, z30.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlalt(z23.VnD(), z31.VnS()), "smlalt z23.d, z31.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlslb(z12.VnD(), z23.VnS()), "smlslb z12.d, z23.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlslb(z5.VnS(), z4.VnH()), "smlslb z5.d, z4.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlslt(z7.VnD(), z9.VnS()), "smlslt z7.d, z9.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(smlslt(z9.VnS(), z21.VnH()), "smlslt z9.d, z21.s,
- // <Zm>.s[<imm>]");
// COMPARE_PREFIX(smullb(z10.VnD(), z4, z4), "smullb z10.d, z4, z4");
// COMPARE_PREFIX(smullb(z10.VnH(), z4, z4), "smullb z10.h, z4, z4");
// COMPARE_PREFIX(smullb(z10.VnS(), z4, z4), "smullb z10.s, z4, z4");
@@ -8773,22 +8812,6 @@ TEST(sve2_all_instructions) {
// <Zm>.d[<imm>]");
// COMPARE_PREFIX(sqrdmulh(z3.VnH(), z29.VnH()), "sqrdmulh z3.d, z29.d,
// <Zm>.d[<imm>]");
- // COMPARE_PREFIX(umlalb(z9.VnD(), z1.VnS()), "umlalb z9.d, z1.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlalb(z9.VnS(), z5.VnH()), "umlalb z9.d, z5.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlalt(z6.VnS(), z17.VnH()), "umlalt z6.d, z17.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlalt(z9.VnD(), z11.VnS()), "umlalt z9.d, z11.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlslb(z12.VnD(), z15.VnS()), "umlslb z12.d, z15.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlslb(z14.VnS(), z10.VnH()), "umlslb z14.d, z10.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlslt(z12.VnD(), z28.VnS()), "umlslt z12.d, z28.s,
- // <Zm>.s[<imm>]");
- // COMPARE_PREFIX(umlslt(z24.VnS(), z12.VnH()), "umlslt z24.d, z12.s,
- // <Zm>.s[<imm>]");
// COMPARE_PREFIX(umullb(z12.VnD(), z25, z22), "umullb z12.d, z25, z22");
// COMPARE_PREFIX(umullb(z12.VnH(), z25, z22), "umullb z12.h, z25, z22");
// COMPARE_PREFIX(umullb(z12.VnS(), z25, z22), "umullb z12.s, z25, z22");
diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc
index d1f302c0..c98d6625 100644
--- a/test/aarch64/test-simulator-sve2-aarch64.cc
+++ b/test/aarch64/test-simulator-sve2-aarch64.cc
@@ -7534,5 +7534,149 @@ TEST_SVE(sve2_floating_multiply_add_long_vector) {
}
}
+TEST_SVE(sve2_mla_long_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1]
+ // vl128 state = 0xd08dbe24
+ __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1]
+ // vl128 state = 0x56f6f237
+ __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1]
+ // vl128 state = 0x00f89e4d
+ __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1]
+ // vl128 state = 0xca4e469e
+ __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3]
+ // vl128 state = 0xd4b18276
+ __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3]
+ // vl128 state = 0x8650a79e
+ __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3]
+ // vl128 state = 0x6fa1a501
+ __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3]
+ // vl128 state = 0x1a56a5d4
+ __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7]
+ // vl128 state = 0xfdb18057
+ __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6]
+ // vl128 state = 0xb46b6c28
+ __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6]
+ // vl128 state = 0x623c62c3
+ __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6]
+ // vl128 state = 0x2abab4d3
+ __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6]
+ // vl128 state = 0x7a028731
+ __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2]
+ // vl128 state = 0xf48f6936
+ __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0]
+ // vl128 state = 0xbcdf888d
+ __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5]
+ // vl128 state = 0x5060778e
+ __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5]
+ // vl128 state = 0x16da3835
+ __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4]
+ // vl128 state = 0xac7fb4d0
+ __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4]
+ // vl128 state = 0x8d05433b
+ __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5]
+ // vl128 state = 0x62630101
+ __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5]
+ // vl128 state = 0x31ae445b
+ __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5]
+ // vl128 state = 0x539a5875
+ __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5]
+ // vl128 state = 0x07d4bf73
+ __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5]
+ // vl128 state = 0x314f48a8
+ __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5]
+ // vl128 state = 0x91bd2c17
+ __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5]
+ // vl128 state = 0x4cbf4360
+ __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3]
+ // vl128 state = 0xe94e76a9
+ __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3]
+ // vl128 state = 0xd0c2c4cc
+ __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3]
+ // vl128 state = 0xc64d6839
+ __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3]
+ // vl128 state = 0xa74358aa
+ __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3]
+ // vl128 state = 0xb8d9664b
+ __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3]
+ // vl128 state = 0xf1032ab4
+ __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3]
+ // vl128 state = 0x763732f4
+ __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1]
+ // vl128 state = 0xdcf39367
+ __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0]
+ // vl128 state = 0x5ea67d82
+ __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2]
+ // vl128 state = 0x55da0908
+ __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2]
+ // vl128 state = 0x69d105f5
+ __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6]
+ // vl128 state = 0x191bc065
+ __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6]
+ // vl128 state = 0xbf62d2a0
+ __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0]
+ // vl128 state = 0x43803a21
+ __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1]
+ // vl128 state = 0x0b33725c
+ __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1]
+ // vl128 state = 0x0059a0f5
+ __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3]
+ // vl128 state = 0xb587057f
+ __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3]
+ // vl128 state = 0x0bfa30c6
+ __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3]
+ // vl128 state = 0x151045b4
+ __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3]
+ // vl128 state = 0xedb7fca9
+ __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2]
+ // vl128 state = 0xb68216f9
+ __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2]
+ // vl128 state = 0x35447b11
+ __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2]
+ // vl128 state = 0xf532285f
+ __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2]
+ // vl128 state = 0xd414889b
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xd414889b,
+ 0x79d8f659,
+ 0xe2c8f06b,
+ 0x91aadf3d,
+ 0xffb92c3e,
+ 0xc2d3138e,
+ 0xdd9f4396,
+ 0xce39a88e,
+ 0xfe68a5ca,
+ 0xdcb072b2,
+ 0x3756ede6,
+ 0x5c2eef22,
+ 0x01fd02a4,
+ 0xdd8d4890,
+ 0x87500dc9,
+ 0x8c895325,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
} // namespace aarch64
} // namespace vixl