diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-06-20 10:08:11 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-06-20 10:08:11 +0000 |
commit | 9fc96b89c9e844163a0c5cca22b61ee3b4c8a6e4 (patch) | |
tree | 755107601ec362303cf4e0bd2c745bba889e87de /tools/llvm-mca | |
parent | 315f4d08af23cee041900090ad838a1500449f5a (diff) |
[llvm-mca][X86] Teach how to identify register writes that implicitly clear the upper portion of a super-register.
This patch teaches llvm-mca how to identify register writes that implicitly zero
the upper portion of a super-register.
On X86-64, a general purpose register is implemented in hardware as a 64-bit
register. Quoting the Intel 64 Software Developer's Manual: "an update to the
lower 32 bits of a 64 bit integer register is architecturally defined to zero
extend the upper 32 bits". Also, a write to an XMM register performed by an AVX
instruction implicitly zeroes the upper 128 bits of the aliasing YMM register.
This patch adds a new method named clearsSuperRegisters to the MCInstrAnalysis
interface to help identify instructions that implicitly clear the upper portion
of a super-register. The rest of the patch teaches llvm-mca how to use that new
method to obtain the information, and update the register dependencies
accordingly.
I compared the kernels from tests clear-super-register-1.s and
clear-super-register-2.s against the output from perf on btver2. Previously
there was a large discrepancy between the estimated IPC and the measured IPC.
Now the differences are mostly in the noise.
Differential Revision: https://reviews.llvm.org/D48225
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335113 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mca')
-rw-r--r-- | tools/llvm-mca/InstrBuilder.cpp | 42 | ||||
-rw-r--r-- | tools/llvm-mca/InstrBuilder.h | 10 | ||||
-rw-r--r-- | tools/llvm-mca/Instruction.h | 17 | ||||
-rw-r--r-- | tools/llvm-mca/RegisterFile.cpp | 4 | ||||
-rw-r--r-- | tools/llvm-mca/llvm-mca.cpp | 5 |
5 files changed, 45 insertions, 33 deletions
diff --git a/tools/llvm-mca/InstrBuilder.cpp b/tools/llvm-mca/InstrBuilder.cpp index 8a66a76605f..dbdf0ed8098 100644 --- a/tools/llvm-mca/InstrBuilder.cpp +++ b/tools/llvm-mca/InstrBuilder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "InstrBuilder.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Debug.h" @@ -158,23 +159,6 @@ static void populateWrites(InstrDesc &ID, const MCInst &MCI, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI) { - // Set if writes through this opcode may update super registers. - // TODO: on x86-64, a 4 byte write of a general purpose register always - // fully updates the super-register. - // More in general, (at least on x86) not all register writes perform - // a partial (super-)register update. - // For example, an AVX instruction that writes on a XMM register implicitly - // zeroes the upper half of every aliasing super-register. - // - // For now, we pessimistically assume that writes are all potentially - // partial register updates. This is a good default for most targets, execept - // for those like x86 which implement a special semantic for certain opcodes. - // At least on x86, this may lead to an inaccurate prediction of the - // instruction level parallelism. - bool FullyUpdatesSuperRegisters = false; - - // Now Populate Writes. - // This algorithm currently works under the strong (and potentially incorrect) // assumption that information related to register def/uses can be obtained // from MCInstrDesc. @@ -275,7 +259,6 @@ static void populateWrites(InstrDesc &ID, const MCInst &MCI, Write.Latency = ID.MaxLatency; Write.SClassOrWriteResourceID = 0; } - Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters; Write.IsOptionalDef = false; LLVM_DEBUG({ dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", Latency=" << Write.Latency @@ -488,16 +471,35 @@ InstrBuilder::createInstruction(const MCInst &MCI) { NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID)); } + // Early exit if there are no writes. + if (D.Writes.empty()) + return NewIS; + + // Track register writes that implicitly clear the upper portion of the + // underlying super-registers using an APInt. + APInt WriteMask(D.Writes.size(), 0); + + // Now query the MCInstrAnalysis object to obtain information about which + // register writes implicitly clear the upper portion of a super-register. + MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); + // Initialize writes. + unsigned WriteIndex = 0; for (const WriteDescriptor &WD : D.Writes) { unsigned RegID = WD.OpIndex == -1 ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg. - if (WD.IsOptionalDef && !RegID) + if (WD.IsOptionalDef && !RegID) { + ++WriteIndex; continue; + } assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(WD, RegID)); + APInt CurrWriteMask = WriteMask & (1 << WriteIndex); + bool UpdatesSuperRegisters = CurrWriteMask.getBoolValue(); + NewIS->getDefs().emplace_back( + llvm::make_unique<WriteState>(WD, RegID, UpdatesSuperRegisters)); + ++WriteIndex; } return NewIS; diff --git a/tools/llvm-mca/InstrBuilder.h b/tools/llvm-mca/InstrBuilder.h index 146e917eb62..1c325d911e3 100644 --- a/tools/llvm-mca/InstrBuilder.h +++ b/tools/llvm-mca/InstrBuilder.h @@ -17,7 +17,9 @@ #include "Instruction.h" #include "Support.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" namespace mca { @@ -37,6 +39,8 @@ class DispatchUnit; class InstrBuilder { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + const llvm::MCRegisterInfo &MRI; + const llvm::MCInstrAnalysis &MCIA; llvm::SmallVector<uint64_t, 8> ProcResourceMasks; llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors; @@ -48,8 +52,10 @@ class InstrBuilder { InstrBuilder &operator=(const InstrBuilder &) = delete; public: - InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii) - : STI(sti), MCII(mcii), + InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, + const llvm::MCRegisterInfo &mri, + const llvm::MCInstrAnalysis &mcia) + : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) { computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); } diff --git a/tools/llvm-mca/Instruction.h b/tools/llvm-mca/Instruction.h index e0a4504ec0e..dc21e889df8 100644 --- a/tools/llvm-mca/Instruction.h +++ b/tools/llvm-mca/Instruction.h @@ -70,11 +70,6 @@ struct WriteDescriptor { // This field is set to a value different than zero only if this // is an implicit definition. unsigned RegisterID; - // True if this write generates a partial update of a super-registers. - // On X86, this flag is set by byte/word writes on GPR registers. Also, - // a write of an XMM register only partially updates the corresponding - // YMM super-register if the write is associated to a legacy SSE instruction. - bool FullyUpdatesSuperRegs; // Instruction itineraries would set this field to the SchedClass ID. // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry // element associated to this write. @@ -129,6 +124,10 @@ class WriteState { // field RegisterID from WD. unsigned RegisterID; + // True if this write implicitly clears the upper portion of RegisterID's + // super-registers. + bool ClearsSuperRegs; + // A list of dependent reads. Users is a set of dependent // reads. A dependent read is added to the set only if CyclesLeft // is "unknown". As soon as CyclesLeft is 'known', each user in the set @@ -138,8 +137,10 @@ class WriteState { std::set<std::pair<ReadState *, int>> Users; public: - WriteState(const WriteDescriptor &Desc, unsigned RegID) - : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID) {} + WriteState(const WriteDescriptor &Desc, unsigned RegID, + bool clearsSuperRegs = false) + : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), + ClearsSuperRegs(clearsSuperRegs) {} WriteState(const WriteState &Other) = delete; WriteState &operator=(const WriteState &Other) = delete; @@ -148,7 +149,7 @@ public: unsigned getRegisterID() const { return RegisterID; } void addUser(ReadState *Use, int ReadAdvance); - bool fullyUpdatesSuperRegs() const { return WD.FullyUpdatesSuperRegs; } + bool clearsSuperRegisters() const { return ClearsSuperRegs; } // On every cycle, update CyclesLeft and notify dependent users. void cycleEvent(); diff --git a/tools/llvm-mca/RegisterFile.cpp b/tools/llvm-mca/RegisterFile.cpp index b12c7a47046..9679bb45485 100644 --- a/tools/llvm-mca/RegisterFile.cpp +++ b/tools/llvm-mca/RegisterFile.cpp @@ -138,7 +138,7 @@ void RegisterFile::addRegisterWrite(WriteState &WS, allocatePhysRegs(Mapping.second, UsedPhysRegs); // If this is a partial update, then we are done. - if (!WS.fullyUpdatesSuperRegs()) + if (!WS.clearsSuperRegisters()) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) @@ -149,7 +149,7 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs, bool ShouldFreePhysRegs) { unsigned RegID = WS.getRegisterID(); - bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs(); + bool ShouldInvalidateSuperRegs = WS.clearsSuperRegisters(); assert(RegID != 0 && "Invalidating an already invalid register?"); assert(WS.getCyclesLeft() != -512 && diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index 4d10704c10c..372be3e0d6d 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -388,6 +388,9 @@ int main(int argc, char **argv) { std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); + std::unique_ptr<MCInstrAnalysis> MCIA( + TheTarget->createMCInstrAnalysis(MCII.get())); + if (!MCPU.compare("native")) MCPU = llvm::sys::getHostCPUName(); @@ -457,7 +460,7 @@ int main(int argc, char **argv) { Width = DispatchWidth; // Create an instruction builder. - mca::InstrBuilder IB(*STI, *MCII); + mca::InstrBuilder IB(*STI, *MCII, *MRI, *MCIA); // Number each region in the sequence. unsigned RegionIdx = 0; |