diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2022-08-11 17:32:35 +0100 |
---|---|---|
committer | Martyn Capewell <martyn.capewell@arm.com> | 2022-08-11 17:32:35 +0100 |
commit | 7df62a379cefe555021abf0adb5103aa16a9d3b4 (patch) | |
tree | 896cbabe912c8b93fd408a34e2b41d975eaa2e4d | |
parent | 024d1cb3194c7d0f716f1191247659c1e7e4056d (diff) | |
parent | bcb9ee3ef8126ae6cb46bf0076615a545c498a8f (diff) |
Merge branch 'main' into mtemte
-rw-r--r-- | src/aarch64/cpu-aarch64.cc | 79 | ||||
-rw-r--r-- | src/aarch64/cpu-aarch64.h | 35 | ||||
-rw-r--r-- | src/cpu-features.h | 16 |
3 files changed, 105 insertions, 25 deletions
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc index bb3f4de5..1700ec57 100644 --- a/src/aarch64/cpu-aarch64.cc +++ b/src/aarch64/cpu-aarch64.cc @@ -48,6 +48,7 @@ const IDRegister::Field AA64PFR0::kCSV3(60); const IDRegister::Field AA64PFR1::kBT(0); const IDRegister::Field AA64PFR1::kSSBS(4); const IDRegister::Field AA64PFR1::kMTE(8); +const IDRegister::Field AA64PFR1::kSME(24); const IDRegister::Field AA64ISAR0::kAES(4); const IDRegister::Field AA64ISAR0::kSHA1(8); @@ -78,6 +79,7 @@ const IDRegister::Field AA64ISAR1::kBF16(44); const IDRegister::Field AA64ISAR1::kDGH(48); const IDRegister::Field AA64ISAR1::kI8MM(52); +const IDRegister::Field AA64ISAR2::kWFXT(0); const IDRegister::Field AA64ISAR2::kRPRES(4); const IDRegister::Field AA64MMFR0::kECV(60); @@ -97,6 +99,14 @@ const IDRegister::Field AA64ZFR0::kI8MM(44); const IDRegister::Field AA64ZFR0::kF32MM(52); const IDRegister::Field AA64ZFR0::kF64MM(56); +const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1); +const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1); +const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1); +const IDRegister::Field AA64SMFR0::kSMEi8i32(36); +const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1); +const IDRegister::Field AA64SMFR0::kSMEi16i64(52); +const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1); + CPUFeatures AA64PFR0::GetCPUFeatures() const { CPUFeatures f; if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); @@ -119,6 +129,8 @@ CPUFeatures AA64PFR1::GetCPUFeatures() const { if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); + if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3); + if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME); return f; } @@ -155,6 +167,7 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const { if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); + if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16); if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); @@ -180,6 +193,7 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const { CPUFeatures AA64ISAR2::GetCPUFeatures() const { CPUFeatures f; + if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT); if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES); return f; } @@ -220,6 +234,18 @@ CPUFeatures AA64ZFR0::GetCPUFeatures() const { return f; } +CPUFeatures AA64SMFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32); + if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32); + if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32); + if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32); + if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64); + if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64); + if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64); + return f; +} + int IDRegister::Get(IDRegister::Field field) const { int msb = field.GetMsb(); int lsb = field.GetLsb(); @@ -252,7 +278,7 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather // than explicit bits, but explicit bits allow us to identify features that // the toolchain doesn't know about. - static const CPUFeatures::Feature kFeatureBits[] = + static const CPUFeatures::Feature kFeatureBitsLow[] = {// Bits 0-7 CPUFeatures::kFP, CPUFeatures::kNEON, @@ -288,8 +314,11 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( CPUFeatures::kSSBSControl, CPUFeatures::kSB, CPUFeatures::kPAuth, - CPUFeatures::kPAuthGeneric, - // Bits 32-39 + CPUFeatures::kPAuthGeneric}; + VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64); + + static const CPUFeatures::Feature kFeatureBitsHigh[] = + {// Bits 0-7 CPUFeatures::kDCCVADP, CPUFeatures::kSVE2, CPUFeatures::kSVEAES, @@ -298,7 +327,7 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( CPUFeatures::kSVESHA3, CPUFeatures::kSVESM4, CPUFeatures::kAXFlag, - // Bits 40-47 + // Bits 8-15 CPUFeatures::kFrintToFixedSizedInt, CPUFeatures::kSVEI8MM, CPUFeatures::kSVEF32MM, @@ -307,24 +336,42 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( CPUFeatures::kI8MM, CPUFeatures::kBF16, CPUFeatures::kDGH, - // Bits 48+ + // Bits 16-23 CPUFeatures::kRNG, CPUFeatures::kBTI, CPUFeatures::kMTE, CPUFeatures::kECV, CPUFeatures::kAFP, - CPUFeatures::kRPRES}; + CPUFeatures::kRPRES, + CPUFeatures::kMTE3, + CPUFeatures::kSME, + // Bits 24-31 + CPUFeatures::kSMEi16i64, + CPUFeatures::kSMEf64f64, + CPUFeatures::kSMEi8i32, + CPUFeatures::kSMEf16f32, + CPUFeatures::kSMEb16f32, + CPUFeatures::kSMEf32f32, + CPUFeatures::kSMEfa64, + CPUFeatures::kWFXT, + // Bits 32-39 + CPUFeatures::kEBF16}; + VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64); - uint64_t hwcap_low32 = getauxval(AT_HWCAP); - uint64_t hwcap_high32 = getauxval(AT_HWCAP2); - VIXL_ASSERT(IsUint32(hwcap_low32)); - VIXL_ASSERT(IsUint32(hwcap_high32)); - uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32); + auto combine_features = [&features](uint64_t hwcap, + const CPUFeatures::Feature* feature_array, + size_t features_size) { + for (size_t i = 0; i < features_size; i++) { + if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]); + } + }; + + uint64_t hwcap_low = getauxval(AT_HWCAP); + uint64_t hwcap_high = getauxval(AT_HWCAP2); + + combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow)); + combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh)); - VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64); - for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) { - if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]); - } // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support if (features.Has(CPUFeatures::kMTE)) { features.Combine(CPUFeatures::kMTEInstructions); @@ -425,7 +472,7 @@ int CPU::ReadSVEVectorLengthInBits() { } -void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) { +void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) { #ifdef __aarch64__ // Implement the cache synchronisation for all targets where AArch64 is the // host, even if we're building the simulator for an AAarch64 host. This diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h index ca033a85..7ec3a25b 100644 --- a/src/aarch64/cpu-aarch64.h +++ b/src/aarch64/cpu-aarch64.h @@ -57,24 +57,24 @@ class IDRegister { public: enum Type { kUnsigned, kSigned }; + static const int kMaxWidthInBits = 4; + // This needs to be constexpr so that fields have "constant initialisation". // This avoids initialisation order problems when these values are used to // (dynamically) initialise static variables, etc. - explicit constexpr Field(int lsb, Type type = kUnsigned) - : lsb_(lsb), type_(type) {} - - static const int kMaxWidthInBits = 4; + explicit constexpr Field(int lsb, + int bitWidth = kMaxWidthInBits, + Type type = kUnsigned) + : lsb_(lsb), bitWidth_(bitWidth), type_(type) {} - int GetWidthInBits() const { - // All current ID fields have four bits. - return kMaxWidthInBits; - } + int GetWidthInBits() const { return bitWidth_; } int GetLsb() const { return lsb_; } int GetMsb() const { return lsb_ + GetWidthInBits() - 1; } Type GetType() const { return type_; } private: int lsb_; + int bitWidth_; Type type_; }; @@ -114,6 +114,7 @@ class AA64PFR1 : public IDRegister { static const Field kBT; static const Field kSSBS; static const Field kMTE; + static const Field kSME; }; class AA64ISAR0 : public IDRegister { @@ -168,6 +169,7 @@ class AA64ISAR2 : public IDRegister { CPUFeatures GetCPUFeatures() const; private: + static const Field kWFXT; static const Field kRPRES; }; @@ -220,6 +222,22 @@ class AA64ZFR0 : public IDRegister { static const Field kF64MM; }; +class AA64SMFR0 : public IDRegister { + public: + explicit AA64SMFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kSMEf32f32; + static const Field kSMEb16f32; + static const Field kSMEf16f32; + static const Field kSMEi8i32; + static const Field kSMEf64f64; + static const Field kSMEi16i64; + static const Field kSMEfa64; +}; + class CPU { public: // Initialise CPU support. @@ -286,6 +304,7 @@ class CPU { V(AA64MMFR1, "ID_AA64MMFR1_EL1") \ /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \ /* read them, but some compilers don't accept the symbolic names. */ \ + V(AA64SMFR0, "S3_0_C0_C4_5") \ V(AA64ISAR2, "S3_0_C0_C6_2") \ V(AA64MMFR2, "S3_0_C0_C7_2") \ V(AA64ZFR0, "S3_0_C0_C4_4") diff --git a/src/cpu-features.h b/src/cpu-features.h index 24781dae..de9e1570 100644 --- a/src/cpu-features.h +++ b/src/cpu-features.h @@ -166,6 +166,7 @@ namespace vixl { /* Memory Tagging Extension. */ \ V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ V(kMTE, "MTE", NULL) \ + V(kMTE3, "MTE (asymmetric)", "mte3") \ /* PAuth extensions. */ \ V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ @@ -185,7 +186,20 @@ namespace vixl { /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \ V(kRPRES, "RPRES", "rpres") \ /* Memory operation instructions, for memcpy, memset */ \ - V(kMOPS, "Memory ops", NULL) + V(kMOPS, "Memory ops", NULL) \ + /* Scalable Matrix Extension (SME) */ \ + V(kSME, "SME", "sme") \ + V(kSMEi16i64, "SME (i16i64)", "smei16i64") \ + V(kSMEf64f64, "SME (f64f64)", "smef64f64") \ + V(kSMEi8i32, "SME (i8i32)", "smei8i32") \ + V(kSMEf16f32, "SME (f16f32)", "smef16f32") \ + V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \ + V(kSMEf32f32, "SME (f32f32)", "smef32f32") \ + V(kSMEfa64, "SME (fa64)", "smefa64") \ + /* WFET and WFIT instruction support */ \ + V(kWFXT, "WFXT", "wfxt") \ + /* Extended BFloat16 instructions */ \ + V(kEBF16, "EBF16", "ebf16") // clang-format on |