diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2020-11-04 18:54:02 +0000 |
---|---|---|
committer | Martyn Capewell <martyn.capewell@arm.com> | 2020-11-26 17:47:55 +0000 |
commit | 1bb3f1a8004cbe61567804ac08bb4b3d2a34555d (patch) | |
tree | 531a5417503ed80a3d61180208de9e3d959cad94 | |
parent | f084401a99029b6a444dccde9c4b40b24c39c40a (diff) |
[sve2] Implement indexed integer complex dot product
Implement the indexed form of the cdot instruction.
Change-Id: I3184beab9bd70f58a9d3da591b5782cbdfb0ef26
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 6 | ||||
-rw-r--r-- | src/aarch64/assembler-sve-aarch64.cc | 30 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 4 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-aarch64.h | 11 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-sve-aarch64.cc | 22 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 56 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.h | 4 | ||||
-rw-r--r-- | test/aarch64/test-disasm-sve-aarch64.cc | 45 | ||||
-rw-r--r-- | test/aarch64/test-simulator-sve2-aarch64.cc | 144 |
9 files changed, 265 insertions, 57 deletions
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 184fced3..b61e8da1 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -5878,7 +5878,11 @@ class Assembler : public vixl::internal::AssemblerBase { int rot); // Complex integer dot product (indexed). - void cdot(const ZRegister& zda, const ZRegister& zn); + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); // Complex integer dot product. void cdot(const ZRegister& zda, diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc index 666c584f..19831076 100644 --- a/src/aarch64/assembler-sve-aarch64.cc +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -6793,17 +6793,36 @@ void Assembler::cadd(const ZRegister& zd, Emit(0x4500d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm)); } -// This prototype maps to 2 instruction encodings: -// cdot_z_zzzi_d -// cdot_z_zzzi_s -void Assembler::cdot(const ZRegister& zda, const ZRegister& zn) { +void Assembler::cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const> // 0100 0100 111. .... 0100 .... .... .... // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeB()) { + // Zm<18:16> | i2<20:19> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); + zm_and_idx = (index << 19) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i1<20> + VIXL_ASSERT(zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); + zm_and_idx = (index << 20) | Rx<19, 16>(zm); + } - Emit(0x44e04000 | Rd(zda) | Rn(zn)); + Instr rotate_bits = (rot / 90) << 10; + Emit(0x44a04000 | zm_and_idx | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn)); } void Assembler::cdot(const ZRegister& zda, @@ -6821,7 +6840,6 @@ void Assembler::cdot(const ZRegister& zda, VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); Instr rotate_bits = (rot / 90) << 10; - Emit(0x44001000 | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); } diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 2900719e..3be2049b 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -10051,7 +10051,7 @@ void Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction *instr) { void Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const( const Instruction *instr) { - const char *form = "'Zd.d, 'Zn.h, <Zm>.h[<imm>], <const>"; + const char *form = "'Zd.d, 'Zn.h, z'u1916.h['u2020], #'u1110*90"; Format(instr, mnemonic_.c_str(), form); } @@ -10073,7 +10073,7 @@ void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const( void Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const( const Instruction *instr) { - const char *form = "'Zd.s, 'Zn.b, <Zm>.b[<imm>], <const>"; + const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019], #'u1110*90"; Format(instr, mnemonic_.c_str(), form); } diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 9c03b3d0..2cd1153e 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -6430,11 +6430,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { const ZRegister& zn, const ZRegister& zm, int rot); - void Cdot(const ZRegister& zda, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cdot(zda, zn); - } + void Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); void Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc index db5d03b7..8cb3f1d1 100644 --- a/src/aarch64/macro-assembler-sve-aarch64.cc +++ b/src/aarch64/macro-assembler-sve-aarch64.cc @@ -1770,6 +1770,28 @@ void MacroAssembler::Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, + int index, + int rot) { + // This doesn't handle zm when it's out of the range that can be encoded in + // instruction. The range depends on element size: z0-z7 for B, z0-15 for H. + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, ztmp, za); + cdot(ztmp, zn, zm, index, rot); + } + Mov(zd, ztmp); + } else { + MovprfxHelperScope guard(this, zd, za); + cdot(zd, zn, zm, index, rot); + } +} + +void MacroAssembler::Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, int rot) { if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { UseScratchRegisterScope temps(this); diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index c0b6028f..598a334e 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -82,9 +82,9 @@ Simulator::FormToVisitorFnMap Simulator::form_to_visitor_ = { {"bsl2n_z_zzz", &Simulator::SimulateSVEBitwiseTernary}, {"bsl_z_zzz", &Simulator::SimulateSVEBitwiseTernary}, {"cadd_z_zz", &Simulator::Simulate_ZdnT_ZdnT_ZmT_const}, - {"cdot_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb_const}, - {"cdot_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnH_ZmH_imm_const}, - {"cdot_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnB_ZmB_imm_const}, + {"cdot_z_zzz", &Simulator::SimulateSVEComplexDotProduct}, + {"cdot_z_zzzi_d", &Simulator::SimulateSVEComplexDotProduct}, + {"cdot_z_zzzi_s", &Simulator::SimulateSVEComplexDotProduct}, {"cmla_z_zzz", &Simulator::Simulate_ZdaT_ZnT_ZmT_const}, {"cmla_z_zzzi_h", &Simulator::Simulate_ZdaH_ZnH_ZmH_imm_const}, {"cmla_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnS_ZmS_imm_const}, @@ -2723,21 +2723,6 @@ void Simulator::Simulate_ZdaD_ZnD_ZmD_imm(const Instruction* instr) { } } -void Simulator::Simulate_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr) { - SimVRegister& zda = ReadVRegister(instr->GetRd()); - USE(zda); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - USE(zn); - - switch (form_hash_) { - case Hash("cdot_z_zzzi_d"): - VIXL_UNIMPLEMENTED(); - break; - default: - VIXL_UNIMPLEMENTED(); - } -} - void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) { SimVRegister& zda = ReadVRegister(instr->GetRd()); USE(zda); @@ -2828,21 +2813,6 @@ void Simulator::Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr) { } } -void Simulator::Simulate_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr) { - SimVRegister& zda = ReadVRegister(instr->GetRd()); - USE(zda); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - USE(zn); - - switch (form_hash_) { - case Hash("cdot_z_zzzi_s"): - VIXL_UNIMPLEMENTED(); - break; - default: - VIXL_UNIMPLEMENTED(); - } -} - void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) { SimVRegister& zda = ReadVRegister(instr->GetRd()); USE(zda); @@ -3150,20 +3120,34 @@ void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) { } } -void Simulator::Simulate_ZdaT_ZnTb_ZmTb_const(const Instruction* instr) { +void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) { VectorFormat vform = instr->GetSVEVectorFormat(); SimVRegister& zda = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); SimVRegister& zn = ReadVRegister(instr->GetRn()); int rot = instr->ExtractBits(11, 10) * 90; + unsigned zm_code = instr->GetRm(); + int index = -1; switch (form_hash_) { case Hash("cdot_z_zzz"): - cdot(vform, zda, zda, zn, zm, rot); + // Nothing to do. + break; + case Hash("cdot_z_zzzi_s"): + index = zm_code >> 3; + zm_code &= 0x7; + break; + case Hash("cdot_z_zzzi_d"): + index = zm_code >> 4; + zm_code &= 0xf; break; default: VIXL_UNIMPLEMENTED(); } + + SimVRegister temp; + SimVRegister& zm = ReadVRegister(zm_code); + if (index >= 0) dup_elements_to_segments(vform, temp, zm, index); + cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot); } void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) { diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 86ea6859..ac5af8b8 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -1187,11 +1187,9 @@ class Simulator : public DecoderVisitor { void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr); void Simulate_ZdT_ZnT_const(const Instruction* instr); void Simulate_ZdaD_ZnD_ZmD_imm(const Instruction* instr); - void Simulate_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr); void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr); void Simulate_ZdaH_ZnH_ZmH_imm(const Instruction* instr); void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr); - void Simulate_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr); void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr); void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr); void Simulate_ZdaS_ZnS_ZmS_imm(const Instruction* instr); @@ -1201,7 +1199,6 @@ class Simulator : public DecoderVisitor { void Simulate_ZdaT_ZnT_ZmT_const(const Instruction* instr); void Simulate_ZdaT_ZnT_const(const Instruction* instr); void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr); - void Simulate_ZdaT_ZnTb_ZmTb_const(const Instruction* instr); void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr); void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr); void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr); @@ -1222,6 +1219,7 @@ class Simulator : public DecoderVisitor { void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr); void SimulateSVEExclusiveOrRotate(const Instruction* instr); void SimulateSVEBitwiseTernary(const Instruction* instr); + void SimulateSVEComplexDotProduct(const Instruction* instr); // Integer register accessors. diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc index 1e46ec9d..58b1cfb8 100644 --- a/test/aarch64/test-disasm-sve-aarch64.cc +++ b/test/aarch64/test-disasm-sve-aarch64.cc @@ -8058,6 +8058,47 @@ TEST(sve2_cdot) { "movprfx z0, z1\n" "cdot z0.s, z31.b, z31.b, #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0), + "cdot z18.s, z26.b, z7.b[0], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0), + "cdot z18.s, z26.b, z7.b[1], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0), + "cdot z18.s, z26.b, z7.b[2], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0), + "cdot z18.s, z26.b, z7.b[3], #0"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90), + "cdot z18.s, z26.b, z7.b[2], #90"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180), + "cdot z18.s, z26.b, z7.b[2], #180"); + COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270), + "cdot z18.s, z26.b, z7.b[2], #270"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0), + "cdot z5.d, z7.h, z1.h[0], #0"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0), + "cdot z5.d, z7.h, z1.h[1], #0"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90), + "cdot z5.d, z7.h, z1.h[1], #90"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180), + "cdot z5.d, z7.h, z1.h[1], #180"); + COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270), + "cdot z5.d, z7.h, z1.h[1], #270"); + + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0), + "movprfx z0, z1\n" + "cdot z0.s, z2.b, z3.b[0], #0"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90), + "movprfx z31, z1\n" + "cdot z31.s, z0.b, z3.b[1], #90\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180), + "movprfx z31, z1\n" + "cdot z31.s, z2.b, z0.b[2], #180\n" + "mov z0.d, z31.d"); + COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270), + "movprfx z31, z1\n" + "cdot z31.s, z0.b, z0.b[3], #270\n" + "mov z0.d, z31.d"); + CLEANUP(); } @@ -8178,10 +8219,6 @@ TEST(sve2_all_instructions) { // TODO: split these instructions into more logical groups. SETUP(); - // COMPARE_PREFIX(cdot(z18.VnS(), z26.VnB()), "cdot z18.d, z26.h, - // <Zm>.h[<imm>], <const>"); - // COMPARE_PREFIX(cdot(z5.VnD(), z7.VnH()), "cdot z5.d, z7.h, <Zm>.h[<imm>], - // <const>"); // COMPARE_PREFIX(cmla(z17.VnS(), z29.VnS()), "cmla z17.h, z29.h, // <Zm>.h[<imm>], <const>"); // COMPARE_PREFIX(cmla(z18.VnH(), z22.VnH()), "cmla z18.h, z22.h, diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc index 86fd7114..26e29f30 100644 --- a/test/aarch64/test-simulator-sve2-aarch64.cc +++ b/test/aarch64/test-simulator-sve2-aarch64.cc @@ -5986,5 +5986,149 @@ TEST_SVE(sve2_while) { } } +TEST_SVE(sve2_cdot_index) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kSVE2, + CPUFeatures::kNEON, + CPUFeatures::kCRC32); + START(); + + SetInitialMachineState(&masm); + // state = 0xe2bd2480 + + { + ExactAssemblyScope scope(&masm, 50 * kInstructionSize); + __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270 + // vl128 state = 0x452d1d6e + __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270 + // vl128 state = 0x546c9569 + __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270 + // vl128 state = 0xa2abf834 + __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90 + // vl128 state = 0xba77ed64 + __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90 + // vl128 state = 0xe78163f2 + __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0 + // vl128 state = 0xca3b116d + __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270 + // vl128 state = 0x57ba3771 + __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270 + // vl128 state = 0x4edccb88 + __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270 + // vl128 state = 0xc9543499 + __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270 + // vl128 state = 0x9d8fe439 + __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270 + // vl128 state = 0x3c1bf0cc + __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270 + // vl128 state = 0x983716f1 + __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270 + // vl128 state = 0x2df96300 + __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180 + // vl128 state = 0xc23edde3 + __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180 + // vl128 state = 0xef0ace9d + __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180 + // vl128 state = 0x2cce8002 + __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180 + // vl128 state = 0xd07f46a1 + __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180 + // vl128 state = 0x239831e8 + __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180 + // vl128 state = 0xa110988d + __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180 + // vl128 state = 0x2b9ef292 + __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180 + // vl128 state = 0x50eeb818 + __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180 + // vl128 state = 0xc33ce03b + __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180 + // vl128 state = 0xe163b5c9 + __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180 + // vl128 state = 0x052a34eb + __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180 + // vl128 state = 0x0660afb4 + __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180 + // vl128 state = 0x0ae01233 + __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180 + // vl128 state = 0xde7bdd15 + __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180 + // vl128 state = 0x758973a1 + __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180 + // vl128 state = 0xb3c5df37 + __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270 + // vl128 state = 0xe652f054 + __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270 + // vl128 state = 0xc4b58041 + __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270 + // vl128 state = 0x1239ca90 + __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180 + // vl128 state = 0x4a01cdcb + __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0 + // vl128 state = 0x604e45cf + __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90 + // vl128 state = 0x12fe2972 + __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270 + // vl128 state = 0x78e0bb2e + __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270 + // vl128 state = 0xe3a69b46 + __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270 + // vl128 state = 0xe6b58aa4 + __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270 + // vl128 state = 0xffcfb597 + __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90 + // vl128 state = 0x2745934b + __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90 + // vl128 state = 0xa38b5571 + __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90 + // vl128 state = 0x978afd92 + __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90 + // vl128 state = 0x9f1b19c9 + __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270 + // vl128 state = 0x61a31d64 + __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270 + // vl128 state = 0x1e71023e + __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270 + // vl128 state = 0xdbe5ffb3 + __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270 + // vl128 state = 0x51390e81 + __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90 + // vl128 state = 0x59ad5198 + __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90 + // vl128 state = 0xe997de49 + __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270 + // vl128 state = 0x5533cefa + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast<uint64_t>(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x5533cefa, + 0x1462a298, + 0x1acb4ead, + 0xeb05ddf0, + 0x23fe8c86, + 0xbb1e9f8c, + 0x4a933f43, + 0x4cd64b55, + 0x84a4b8b7, + 0x52019619, + 0x4442432b, + 0x9b353ce8, + 0x333c9eef, + 0x291eac87, + 0x110f7371, + 0x009b25cb, + }; + ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); + } +} + } // namespace aarch64 } // namespace vixl |