aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2022-08-11 17:32:35 +0100
committerMartyn Capewell <martyn.capewell@arm.com>2022-08-11 17:32:35 +0100
commit7df62a379cefe555021abf0adb5103aa16a9d3b4 (patch)
tree896cbabe912c8b93fd408a34e2b41d975eaa2e4d
parent024d1cb3194c7d0f716f1191247659c1e7e4056d (diff)
parentbcb9ee3ef8126ae6cb46bf0076615a545c498a8f (diff)
Merge branch 'main' into mtemte
-rw-r--r--src/aarch64/cpu-aarch64.cc79
-rw-r--r--src/aarch64/cpu-aarch64.h35
-rw-r--r--src/cpu-features.h16
3 files changed, 105 insertions, 25 deletions
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index bb3f4de5..1700ec57 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -48,6 +48,7 @@ const IDRegister::Field AA64PFR0::kCSV3(60);
const IDRegister::Field AA64PFR1::kBT(0);
const IDRegister::Field AA64PFR1::kSSBS(4);
const IDRegister::Field AA64PFR1::kMTE(8);
+const IDRegister::Field AA64PFR1::kSME(24);
const IDRegister::Field AA64ISAR0::kAES(4);
const IDRegister::Field AA64ISAR0::kSHA1(8);
@@ -78,6 +79,7 @@ const IDRegister::Field AA64ISAR1::kBF16(44);
const IDRegister::Field AA64ISAR1::kDGH(48);
const IDRegister::Field AA64ISAR1::kI8MM(52);
+const IDRegister::Field AA64ISAR2::kWFXT(0);
const IDRegister::Field AA64ISAR2::kRPRES(4);
const IDRegister::Field AA64MMFR0::kECV(60);
@@ -97,6 +99,14 @@ const IDRegister::Field AA64ZFR0::kI8MM(44);
const IDRegister::Field AA64ZFR0::kF32MM(52);
const IDRegister::Field AA64ZFR0::kF64MM(56);
+const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
+const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
+const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
+const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
+const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
+const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
+const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
+
CPUFeatures AA64PFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
@@ -119,6 +129,8 @@ CPUFeatures AA64PFR1::GetCPUFeatures() const {
if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
+ if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
+ if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
return f;
}
@@ -155,6 +167,7 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const {
if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+ if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
@@ -180,6 +193,7 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const {
CPUFeatures AA64ISAR2::GetCPUFeatures() const {
CPUFeatures f;
+ if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
return f;
}
@@ -220,6 +234,18 @@ CPUFeatures AA64ZFR0::GetCPUFeatures() const {
return f;
}
+CPUFeatures AA64SMFR0::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
+ if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
+ if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
+ if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
+ if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
+ if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
+ if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
+ return f;
+}
+
int IDRegister::Get(IDRegister::Field field) const {
int msb = field.GetMsb();
int lsb = field.GetLsb();
@@ -252,7 +278,7 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
// Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
// than explicit bits, but explicit bits allow us to identify features that
// the toolchain doesn't know about.
- static const CPUFeatures::Feature kFeatureBits[] =
+ static const CPUFeatures::Feature kFeatureBitsLow[] =
{// Bits 0-7
CPUFeatures::kFP,
CPUFeatures::kNEON,
@@ -288,8 +314,11 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::kSSBSControl,
CPUFeatures::kSB,
CPUFeatures::kPAuth,
- CPUFeatures::kPAuthGeneric,
- // Bits 32-39
+ CPUFeatures::kPAuthGeneric};
+ VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
+
+ static const CPUFeatures::Feature kFeatureBitsHigh[] =
+ {// Bits 0-7
CPUFeatures::kDCCVADP,
CPUFeatures::kSVE2,
CPUFeatures::kSVEAES,
@@ -298,7 +327,7 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::kSVESHA3,
CPUFeatures::kSVESM4,
CPUFeatures::kAXFlag,
- // Bits 40-47
+ // Bits 8-15
CPUFeatures::kFrintToFixedSizedInt,
CPUFeatures::kSVEI8MM,
CPUFeatures::kSVEF32MM,
@@ -307,24 +336,42 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::kI8MM,
CPUFeatures::kBF16,
CPUFeatures::kDGH,
- // Bits 48+
+ // Bits 16-23
CPUFeatures::kRNG,
CPUFeatures::kBTI,
CPUFeatures::kMTE,
CPUFeatures::kECV,
CPUFeatures::kAFP,
- CPUFeatures::kRPRES};
+ CPUFeatures::kRPRES,
+ CPUFeatures::kMTE3,
+ CPUFeatures::kSME,
+ // Bits 24-31
+ CPUFeatures::kSMEi16i64,
+ CPUFeatures::kSMEf64f64,
+ CPUFeatures::kSMEi8i32,
+ CPUFeatures::kSMEf16f32,
+ CPUFeatures::kSMEb16f32,
+ CPUFeatures::kSMEf32f32,
+ CPUFeatures::kSMEfa64,
+ CPUFeatures::kWFXT,
+ // Bits 32-39
+ CPUFeatures::kEBF16};
+ VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
- uint64_t hwcap_low32 = getauxval(AT_HWCAP);
- uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
- VIXL_ASSERT(IsUint32(hwcap_low32));
- VIXL_ASSERT(IsUint32(hwcap_high32));
- uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
+ auto combine_features = [&features](uint64_t hwcap,
+ const CPUFeatures::Feature* feature_array,
+ size_t features_size) {
+ for (size_t i = 0; i < features_size; i++) {
+ if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
+ }
+ };
+
+ uint64_t hwcap_low = getauxval(AT_HWCAP);
+ uint64_t hwcap_high = getauxval(AT_HWCAP2);
+
+ combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
+ combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
- VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
- for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
- if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
- }
// MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
if (features.Has(CPUFeatures::kMTE)) {
features.Combine(CPUFeatures::kMTEInstructions);
@@ -425,7 +472,7 @@ int CPU::ReadSVEVectorLengthInBits() {
}
-void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
#ifdef __aarch64__
// Implement the cache synchronisation for all targets where AArch64 is the
// host, even if we're building the simulator for an AAarch64 host. This
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index ca033a85..7ec3a25b 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -57,24 +57,24 @@ class IDRegister {
public:
enum Type { kUnsigned, kSigned };
+ static const int kMaxWidthInBits = 4;
+
// This needs to be constexpr so that fields have "constant initialisation".
// This avoids initialisation order problems when these values are used to
// (dynamically) initialise static variables, etc.
- explicit constexpr Field(int lsb, Type type = kUnsigned)
- : lsb_(lsb), type_(type) {}
-
- static const int kMaxWidthInBits = 4;
+ explicit constexpr Field(int lsb,
+ int bitWidth = kMaxWidthInBits,
+ Type type = kUnsigned)
+ : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
- int GetWidthInBits() const {
- // All current ID fields have four bits.
- return kMaxWidthInBits;
- }
+ int GetWidthInBits() const { return bitWidth_; }
int GetLsb() const { return lsb_; }
int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
Type GetType() const { return type_; }
private:
int lsb_;
+ int bitWidth_;
Type type_;
};
@@ -114,6 +114,7 @@ class AA64PFR1 : public IDRegister {
static const Field kBT;
static const Field kSSBS;
static const Field kMTE;
+ static const Field kSME;
};
class AA64ISAR0 : public IDRegister {
@@ -168,6 +169,7 @@ class AA64ISAR2 : public IDRegister {
CPUFeatures GetCPUFeatures() const;
private:
+ static const Field kWFXT;
static const Field kRPRES;
};
@@ -220,6 +222,22 @@ class AA64ZFR0 : public IDRegister {
static const Field kF64MM;
};
+class AA64SMFR0 : public IDRegister {
+ public:
+ explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kSMEf32f32;
+ static const Field kSMEb16f32;
+ static const Field kSMEf16f32;
+ static const Field kSMEi8i32;
+ static const Field kSMEf64f64;
+ static const Field kSMEi16i64;
+ static const Field kSMEfa64;
+};
+
class CPU {
public:
// Initialise CPU support.
@@ -286,6 +304,7 @@ class CPU {
V(AA64MMFR1, "ID_AA64MMFR1_EL1") \
/* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
/* read them, but some compilers don't accept the symbolic names. */ \
+ V(AA64SMFR0, "S3_0_C0_C4_5") \
V(AA64ISAR2, "S3_0_C0_C6_2") \
V(AA64MMFR2, "S3_0_C0_C7_2") \
V(AA64ZFR0, "S3_0_C0_C4_4")
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 24781dae..de9e1570 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -166,6 +166,7 @@ namespace vixl {
/* Memory Tagging Extension. */ \
V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \
V(kMTE, "MTE", NULL) \
+ V(kMTE3, "MTE (asymmetric)", "mte3") \
/* PAuth extensions. */ \
V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \
V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \
@@ -185,7 +186,20 @@ namespace vixl {
/* Increased precision of Reciprocal Estimate and Square Root Estimate */ \
V(kRPRES, "RPRES", "rpres") \
/* Memory operation instructions, for memcpy, memset */ \
- V(kMOPS, "Memory ops", NULL)
+ V(kMOPS, "Memory ops", NULL) \
+ /* Scalable Matrix Extension (SME) */ \
+ V(kSME, "SME", "sme") \
+ V(kSMEi16i64, "SME (i16i64)", "smei16i64") \
+ V(kSMEf64f64, "SME (f64f64)", "smef64f64") \
+ V(kSMEi8i32, "SME (i8i32)", "smei8i32") \
+ V(kSMEf16f32, "SME (f16f32)", "smef16f32") \
+ V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \
+ V(kSMEf32f32, "SME (f32f32)", "smef32f32") \
+ V(kSMEfa64, "SME (fa64)", "smefa64") \
+ /* WFET and WFIT instruction support */ \
+ V(kWFXT, "WFXT", "wfxt") \
+ /* Extended BFloat16 instructions */ \
+ V(kEBF16, "EBF16", "ebf16")
// clang-format on