aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2020-11-04 18:54:02 +0000
committerMartyn Capewell <martyn.capewell@arm.com>2020-11-26 17:47:55 +0000
commit1bb3f1a8004cbe61567804ac08bb4b3d2a34555d (patch)
tree531a5417503ed80a3d61180208de9e3d959cad94
parentf084401a99029b6a444dccde9c4b40b24c39c40a (diff)
[sve2] Implement indexed integer complex dot product
Implement the indexed form of the cdot instruction. Change-Id: I3184beab9bd70f58a9d3da591b5782cbdfb0ef26
-rw-r--r--src/aarch64/assembler-aarch64.h6
-rw-r--r--src/aarch64/assembler-sve-aarch64.cc30
-rw-r--r--src/aarch64/disasm-aarch64.cc4
-rw-r--r--src/aarch64/macro-assembler-aarch64.h11
-rw-r--r--src/aarch64/macro-assembler-sve-aarch64.cc22
-rw-r--r--src/aarch64/simulator-aarch64.cc56
-rw-r--r--src/aarch64/simulator-aarch64.h4
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc45
-rw-r--r--test/aarch64/test-simulator-sve2-aarch64.cc144
9 files changed, 265 insertions, 57 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 184fced3..b61e8da1 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -5878,7 +5878,11 @@ class Assembler : public vixl::internal::AssemblerBase {
int rot);
// Complex integer dot product (indexed).
- void cdot(const ZRegister& zda, const ZRegister& zn);
+ void cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
// Complex integer dot product.
void cdot(const ZRegister& zda,
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
index 666c584f..19831076 100644
--- a/src/aarch64/assembler-sve-aarch64.cc
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -6793,17 +6793,36 @@ void Assembler::cadd(const ZRegister& zd,
Emit(0x4500d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm));
}
-// This prototype maps to 2 instruction encodings:
-// cdot_z_zzzi_d
-// cdot_z_zzzi_s
-void Assembler::cdot(const ZRegister& zda, const ZRegister& zn) {
+void Assembler::cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
// CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
// 0100 0100 111. .... 0100 .... .... ....
// size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0>
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeB()) {
+ // Zm<18:16> | i2<20:19>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3));
+ zm_and_idx = (index << 19) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i1<20>
+ VIXL_ASSERT(zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1));
+ zm_and_idx = (index << 20) | Rx<19, 16>(zm);
+ }
- Emit(0x44e04000 | Rd(zda) | Rn(zn));
+ Instr rotate_bits = (rot / 90) << 10;
+ Emit(0x44a04000 | zm_and_idx | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn));
}
void Assembler::cdot(const ZRegister& zda,
@@ -6821,7 +6840,6 @@ void Assembler::cdot(const ZRegister& zda,
VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
Instr rotate_bits = (rot / 90) << 10;
-
Emit(0x44001000 | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
}
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 2900719e..3be2049b 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -10051,7 +10051,7 @@ void Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction *instr) {
void Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const(
const Instruction *instr) {
- const char *form = "'Zd.d, 'Zn.h, <Zm>.h[<imm>], <const>";
+ const char *form = "'Zd.d, 'Zn.h, z'u1916.h['u2020], #'u1110*90";
Format(instr, mnemonic_.c_str(), form);
}
@@ -10073,7 +10073,7 @@ void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const(
void Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const(
const Instruction *instr) {
- const char *form = "'Zd.s, 'Zn.b, <Zm>.b[<imm>], <const>";
+ const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019], #'u1110*90";
Format(instr, mnemonic_.c_str(), form);
}
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 9c03b3d0..2cd1153e 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -6430,11 +6430,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
const ZRegister& zn,
const ZRegister& zm,
int rot);
- void Cdot(const ZRegister& zda, const ZRegister& zn) {
- VIXL_ASSERT(allow_macro_instructions_);
- SingleEmissionCheckScope guard(this);
- cdot(zda, zn);
- }
+ void Cdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
void Cdot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
index db5d03b7..8cb3f1d1 100644
--- a/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -1770,6 +1770,28 @@ void MacroAssembler::Cdot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm,
+ int index,
+ int rot) {
+ // This doesn't handle zm when it's out of the range that can be encoded in
+ // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, ztmp, za);
+ cdot(ztmp, zn, zm, index, rot);
+ }
+ Mov(zd, ztmp);
+ } else {
+ MovprfxHelperScope guard(this, zd, za);
+ cdot(zd, zn, zm, index, rot);
+ }
+}
+
+void MacroAssembler::Cdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
int rot) {
if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
UseScratchRegisterScope temps(this);
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index c0b6028f..598a334e 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -82,9 +82,9 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = {
{"bsl2n_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
{"bsl_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
{"cadd_z_zz", &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
- {"cdot_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb_const},
- {"cdot_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnH_ZmH_imm_const},
- {"cdot_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnB_ZmB_imm_const},
+ {"cdot_z_zzz", &Simulator::SimulateSVEComplexDotProduct},
+ {"cdot_z_zzzi_d", &Simulator::SimulateSVEComplexDotProduct},
+ {"cdot_z_zzzi_s", &Simulator::SimulateSVEComplexDotProduct},
{"cmla_z_zzz", &Simulator::Simulate_ZdaT_ZnT_ZmT_const},
{"cmla_z_zzzi_h", &Simulator::Simulate_ZdaH_ZnH_ZmH_imm_const},
{"cmla_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnS_ZmS_imm_const},
@@ -2723,21 +2723,6 @@ void Simulator::Simulate_ZdaD_ZnD_ZmD_imm(const Instruction* instr) {
}
}
-void Simulator::Simulate_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr) {
- SimVRegister& zda = ReadVRegister(instr->GetRd());
- USE(zda);
- SimVRegister& zn = ReadVRegister(instr->GetRn());
- USE(zn);
-
- switch (form_hash_) {
- case Hash("cdot_z_zzzi_d"):
- VIXL_UNIMPLEMENTED();
- break;
- default:
- VIXL_UNIMPLEMENTED();
- }
-}
-
void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
SimVRegister& zda = ReadVRegister(instr->GetRd());
USE(zda);
@@ -2828,21 +2813,6 @@ void Simulator::Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr) {
}
}
-void Simulator::Simulate_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr) {
- SimVRegister& zda = ReadVRegister(instr->GetRd());
- USE(zda);
- SimVRegister& zn = ReadVRegister(instr->GetRn());
- USE(zn);
-
- switch (form_hash_) {
- case Hash("cdot_z_zzzi_s"):
- VIXL_UNIMPLEMENTED();
- break;
- default:
- VIXL_UNIMPLEMENTED();
- }
-}
-
void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
SimVRegister& zda = ReadVRegister(instr->GetRd());
USE(zda);
@@ -3150,20 +3120,34 @@ void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
}
}
-void Simulator::Simulate_ZdaT_ZnTb_ZmTb_const(const Instruction* instr) {
+void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister& zda = ReadVRegister(instr->GetRd());
- SimVRegister& zm = ReadVRegister(instr->GetRm());
SimVRegister& zn = ReadVRegister(instr->GetRn());
int rot = instr->ExtractBits(11, 10) * 90;
+ unsigned zm_code = instr->GetRm();
+ int index = -1;
switch (form_hash_) {
case Hash("cdot_z_zzz"):
- cdot(vform, zda, zda, zn, zm, rot);
+ // Nothing to do.
+ break;
+ case Hash("cdot_z_zzzi_s"):
+ index = zm_code >> 3;
+ zm_code &= 0x7;
+ break;
+ case Hash("cdot_z_zzzi_d"):
+ index = zm_code >> 4;
+ zm_code &= 0xf;
break;
default:
VIXL_UNIMPLEMENTED();
}
+
+ SimVRegister temp;
+ SimVRegister& zm = ReadVRegister(zm_code);
+ if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
+ cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
}
void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 86ea6859..ac5af8b8 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1187,11 +1187,9 @@ class Simulator : public DecoderVisitor {
void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr);
void Simulate_ZdT_ZnT_const(const Instruction* instr);
void Simulate_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
- void Simulate_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
void Simulate_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
- void Simulate_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr);
void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
void Simulate_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
@@ -1201,7 +1199,6 @@ class Simulator : public DecoderVisitor {
void Simulate_ZdaT_ZnT_ZmT_const(const Instruction* instr);
void Simulate_ZdaT_ZnT_const(const Instruction* instr);
void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr);
- void Simulate_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr);
void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
@@ -1222,6 +1219,7 @@ class Simulator : public DecoderVisitor {
void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr);
void SimulateSVEExclusiveOrRotate(const Instruction* instr);
void SimulateSVEBitwiseTernary(const Instruction* instr);
+ void SimulateSVEComplexDotProduct(const Instruction* instr);
// Integer register accessors.
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 1e46ec9d..58b1cfb8 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -8058,6 +8058,47 @@ TEST(sve2_cdot) {
"movprfx z0, z1\n"
"cdot z0.s, z31.b, z31.b, #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0),
+ "cdot z18.s, z26.b, z7.b[0], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0),
+ "cdot z18.s, z26.b, z7.b[1], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0),
+ "cdot z18.s, z26.b, z7.b[2], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0),
+ "cdot z18.s, z26.b, z7.b[3], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90),
+ "cdot z18.s, z26.b, z7.b[2], #90");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180),
+ "cdot z18.s, z26.b, z7.b[2], #180");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270),
+ "cdot z18.s, z26.b, z7.b[2], #270");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0),
+ "cdot z5.d, z7.h, z1.h[0], #0");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0),
+ "cdot z5.d, z7.h, z1.h[1], #0");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90),
+ "cdot z5.d, z7.h, z1.h[1], #90");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180),
+ "cdot z5.d, z7.h, z1.h[1], #180");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270),
+ "cdot z5.d, z7.h, z1.h[1], #270");
+
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0),
+ "movprfx z0, z1\n"
+ "cdot z0.s, z2.b, z3.b[0], #0");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z0.b, z3.b[1], #90\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z2.b, z0.b[2], #180\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z0.b, z0.b[3], #270\n"
+ "mov z0.d, z31.d");
+
CLEANUP();
}
@@ -8178,10 +8219,6 @@ TEST(sve2_all_instructions) {
// TODO: split these instructions into more logical groups.
SETUP();
- // COMPARE_PREFIX(cdot(z18.VnS(), z26.VnB()), "cdot z18.d, z26.h,
- // <Zm>.h[<imm>], <const>");
- // COMPARE_PREFIX(cdot(z5.VnD(), z7.VnH()), "cdot z5.d, z7.h, <Zm>.h[<imm>],
- // <const>");
// COMPARE_PREFIX(cmla(z17.VnS(), z29.VnS()), "cmla z17.h, z29.h,
// <Zm>.h[<imm>], <const>");
// COMPARE_PREFIX(cmla(z18.VnH(), z22.VnH()), "cmla z18.h, z22.h,
diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc
index 86fd7114..26e29f30 100644
--- a/test/aarch64/test-simulator-sve2-aarch64.cc
+++ b/test/aarch64/test-simulator-sve2-aarch64.cc
@@ -5986,5 +5986,149 @@ TEST_SVE(sve2_while) {
}
}
+TEST_SVE(sve2_cdot_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270
+ // vl128 state = 0x452d1d6e
+ __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270
+ // vl128 state = 0x546c9569
+ __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270
+ // vl128 state = 0xa2abf834
+ __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90
+ // vl128 state = 0xba77ed64
+ __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90
+ // vl128 state = 0xe78163f2
+ __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0
+ // vl128 state = 0xca3b116d
+ __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270
+ // vl128 state = 0x57ba3771
+ __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270
+ // vl128 state = 0x4edccb88
+ __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270
+ // vl128 state = 0xc9543499
+ __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270
+ // vl128 state = 0x9d8fe439
+ __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270
+ // vl128 state = 0x3c1bf0cc
+ __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270
+ // vl128 state = 0x983716f1
+ __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270
+ // vl128 state = 0x2df96300
+ __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180
+ // vl128 state = 0xc23edde3
+ __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180
+ // vl128 state = 0xef0ace9d
+ __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180
+ // vl128 state = 0x2cce8002
+ __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180
+ // vl128 state = 0xd07f46a1
+ __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180
+ // vl128 state = 0x239831e8
+ __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180
+ // vl128 state = 0xa110988d
+ __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180
+ // vl128 state = 0x2b9ef292
+ __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180
+ // vl128 state = 0x50eeb818
+ __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180
+ // vl128 state = 0xc33ce03b
+ __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180
+ // vl128 state = 0xe163b5c9
+ __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180
+ // vl128 state = 0x052a34eb
+ __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180
+ // vl128 state = 0x0660afb4
+ __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180
+ // vl128 state = 0x0ae01233
+ __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180
+ // vl128 state = 0xde7bdd15
+ __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180
+ // vl128 state = 0x758973a1
+ __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180
+ // vl128 state = 0xb3c5df37
+ __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270
+ // vl128 state = 0xe652f054
+ __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270
+ // vl128 state = 0xc4b58041
+ __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270
+ // vl128 state = 0x1239ca90
+ __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180
+ // vl128 state = 0x4a01cdcb
+ __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0
+ // vl128 state = 0x604e45cf
+ __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90
+ // vl128 state = 0x12fe2972
+ __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270
+ // vl128 state = 0x78e0bb2e
+ __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270
+ // vl128 state = 0xe3a69b46
+ __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270
+ // vl128 state = 0xe6b58aa4
+ __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270
+ // vl128 state = 0xffcfb597
+ __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90
+ // vl128 state = 0x2745934b
+ __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90
+ // vl128 state = 0xa38b5571
+ __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90
+ // vl128 state = 0x978afd92
+ __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90
+ // vl128 state = 0x9f1b19c9
+ __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270
+ // vl128 state = 0x61a31d64
+ __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270
+ // vl128 state = 0x1e71023e
+ __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270
+ // vl128 state = 0xdbe5ffb3
+ __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270
+ // vl128 state = 0x51390e81
+ __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90
+ // vl128 state = 0x59ad5198
+ __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90
+ // vl128 state = 0xe997de49
+ __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270
+ // vl128 state = 0x5533cefa
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5533cefa,
+ 0x1462a298,
+ 0x1acb4ead,
+ 0xeb05ddf0,
+ 0x23fe8c86,
+ 0xbb1e9f8c,
+ 0x4a933f43,
+ 0x4cd64b55,
+ 0x84a4b8b7,
+ 0x52019619,
+ 0x4442432b,
+ 0x9b353ce8,
+ 0x333c9eef,
+ 0x291eac87,
+ 0x110f7371,
+ 0x009b25cb,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
} // namespace aarch64
} // namespace vixl