aboutsummaryrefslogtreecommitdiff
path: root/tools/llvm-mca
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-06-20 10:08:11 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-06-20 10:08:11 +0000
commit9fc96b89c9e844163a0c5cca22b61ee3b4c8a6e4 (patch)
tree755107601ec362303cf4e0bd2c745bba889e87de /tools/llvm-mca
parent315f4d08af23cee041900090ad838a1500449f5a (diff)
[llvm-mca][X86] Teach how to identify register writes that implicitly clear the upper portion of a super-register.
This patch teaches llvm-mca how to identify register writes that implicitly zero the upper portion of a super-register. On X86-64, a general purpose register is implemented in hardware as a 64-bit register. Quoting the Intel 64 Software Developer's Manual: "an update to the lower 32 bits of a 64 bit integer register is architecturally defined to zero extend the upper 32 bits". Also, a write to an XMM register performed by an AVX instruction implicitly zeroes the upper 128 bits of the aliasing YMM register. This patch adds a new method named clearsSuperRegisters to the MCInstrAnalysis interface to help identify instructions that implicitly clear the upper portion of a super-register. The rest of the patch teaches llvm-mca how to use that new method to obtain the information, and update the register dependencies accordingly. I compared the kernels from tests clear-super-register-1.s and clear-super-register-2.s against the output from perf on btver2. Previously there was a large discrepancy between the estimated IPC and the measured IPC. Now the differences are mostly in the noise. Differential Revision: https://reviews.llvm.org/D48225 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335113 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mca')
-rw-r--r--tools/llvm-mca/InstrBuilder.cpp42
-rw-r--r--tools/llvm-mca/InstrBuilder.h10
-rw-r--r--tools/llvm-mca/Instruction.h17
-rw-r--r--tools/llvm-mca/RegisterFile.cpp4
-rw-r--r--tools/llvm-mca/llvm-mca.cpp5
5 files changed, 45 insertions, 33 deletions
diff --git a/tools/llvm-mca/InstrBuilder.cpp b/tools/llvm-mca/InstrBuilder.cpp
index 8a66a76605f..dbdf0ed8098 100644
--- a/tools/llvm-mca/InstrBuilder.cpp
+++ b/tools/llvm-mca/InstrBuilder.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
@@ -158,23 +159,6 @@ static void populateWrites(InstrDesc &ID, const MCInst &MCI,
const MCInstrDesc &MCDesc,
const MCSchedClassDesc &SCDesc,
const MCSubtargetInfo &STI) {
- // Set if writes through this opcode may update super registers.
- // TODO: on x86-64, a 4 byte write of a general purpose register always
- // fully updates the super-register.
- // More in general, (at least on x86) not all register writes perform
- // a partial (super-)register update.
- // For example, an AVX instruction that writes on a XMM register implicitly
- // zeroes the upper half of every aliasing super-register.
- //
- // For now, we pessimistically assume that writes are all potentially
- // partial register updates. This is a good default for most targets, execept
- // for those like x86 which implement a special semantic for certain opcodes.
- // At least on x86, this may lead to an inaccurate prediction of the
- // instruction level parallelism.
- bool FullyUpdatesSuperRegisters = false;
-
- // Now Populate Writes.
-
// This algorithm currently works under the strong (and potentially incorrect)
// assumption that information related to register def/uses can be obtained
// from MCInstrDesc.
@@ -275,7 +259,6 @@ static void populateWrites(InstrDesc &ID, const MCInst &MCI,
Write.Latency = ID.MaxLatency;
Write.SClassOrWriteResourceID = 0;
}
- Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters;
Write.IsOptionalDef = false;
LLVM_DEBUG({
dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", Latency=" << Write.Latency
@@ -488,16 +471,35 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
}
+ // Early exit if there are no writes.
+ if (D.Writes.empty())
+ return NewIS;
+
+ // Track register writes that implicitly clear the upper portion of the
+ // underlying super-registers using an APInt.
+ APInt WriteMask(D.Writes.size(), 0);
+
+ // Now query the MCInstrAnalysis object to obtain information about which
+ // register writes implicitly clear the upper portion of a super-register.
+ MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+
// Initialize writes.
+ unsigned WriteIndex = 0;
for (const WriteDescriptor &WD : D.Writes) {
unsigned RegID =
WD.OpIndex == -1 ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg();
// Check if this is a optional definition that references NoReg.
- if (WD.IsOptionalDef && !RegID)
+ if (WD.IsOptionalDef && !RegID) {
+ ++WriteIndex;
continue;
+ }
assert(RegID && "Expected a valid register ID!");
- NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(WD, RegID));
+ APInt CurrWriteMask = WriteMask & (1 << WriteIndex);
+ bool UpdatesSuperRegisters = CurrWriteMask.getBoolValue();
+ NewIS->getDefs().emplace_back(
+ llvm::make_unique<WriteState>(WD, RegID, UpdatesSuperRegisters));
+ ++WriteIndex;
}
return NewIS;
diff --git a/tools/llvm-mca/InstrBuilder.h b/tools/llvm-mca/InstrBuilder.h
index 146e917eb62..1c325d911e3 100644
--- a/tools/llvm-mca/InstrBuilder.h
+++ b/tools/llvm-mca/InstrBuilder.h
@@ -17,7 +17,9 @@
#include "Instruction.h"
#include "Support.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace mca {
@@ -37,6 +39,8 @@ class DispatchUnit;
class InstrBuilder {
const llvm::MCSubtargetInfo &STI;
const llvm::MCInstrInfo &MCII;
+ const llvm::MCRegisterInfo &MRI;
+ const llvm::MCInstrAnalysis &MCIA;
llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
@@ -48,8 +52,10 @@ class InstrBuilder {
InstrBuilder &operator=(const InstrBuilder &) = delete;
public:
- InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii)
- : STI(sti), MCII(mcii),
+ InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii,
+ const llvm::MCRegisterInfo &mri,
+ const llvm::MCInstrAnalysis &mcia)
+ : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia),
ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) {
computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
}
diff --git a/tools/llvm-mca/Instruction.h b/tools/llvm-mca/Instruction.h
index e0a4504ec0e..dc21e889df8 100644
--- a/tools/llvm-mca/Instruction.h
+++ b/tools/llvm-mca/Instruction.h
@@ -70,11 +70,6 @@ struct WriteDescriptor {
// This field is set to a value different than zero only if this
// is an implicit definition.
unsigned RegisterID;
- // True if this write generates a partial update of a super-registers.
- // On X86, this flag is set by byte/word writes on GPR registers. Also,
- // a write of an XMM register only partially updates the corresponding
- // YMM super-register if the write is associated to a legacy SSE instruction.
- bool FullyUpdatesSuperRegs;
// Instruction itineraries would set this field to the SchedClass ID.
// Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
// element associated to this write.
@@ -129,6 +124,10 @@ class WriteState {
// field RegisterID from WD.
unsigned RegisterID;
+ // True if this write implicitly clears the upper portion of RegisterID's
+ // super-registers.
+ bool ClearsSuperRegs;
+
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
// is "unknown". As soon as CyclesLeft is 'known', each user in the set
@@ -138,8 +137,10 @@ class WriteState {
std::set<std::pair<ReadState *, int>> Users;
public:
- WriteState(const WriteDescriptor &Desc, unsigned RegID)
- : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID) {}
+ WriteState(const WriteDescriptor &Desc, unsigned RegID,
+ bool clearsSuperRegs = false)
+ : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
+ ClearsSuperRegs(clearsSuperRegs) {}
WriteState(const WriteState &Other) = delete;
WriteState &operator=(const WriteState &Other) = delete;
@@ -148,7 +149,7 @@ public:
unsigned getRegisterID() const { return RegisterID; }
void addUser(ReadState *Use, int ReadAdvance);
- bool fullyUpdatesSuperRegs() const { return WD.FullyUpdatesSuperRegs; }
+ bool clearsSuperRegisters() const { return ClearsSuperRegs; }
// On every cycle, update CyclesLeft and notify dependent users.
void cycleEvent();
diff --git a/tools/llvm-mca/RegisterFile.cpp b/tools/llvm-mca/RegisterFile.cpp
index b12c7a47046..9679bb45485 100644
--- a/tools/llvm-mca/RegisterFile.cpp
+++ b/tools/llvm-mca/RegisterFile.cpp
@@ -138,7 +138,7 @@ void RegisterFile::addRegisterWrite(WriteState &WS,
allocatePhysRegs(Mapping.second, UsedPhysRegs);
// If this is a partial update, then we are done.
- if (!WS.fullyUpdatesSuperRegs())
+ if (!WS.clearsSuperRegisters())
return;
for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
@@ -149,7 +149,7 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
MutableArrayRef<unsigned> FreedPhysRegs,
bool ShouldFreePhysRegs) {
unsigned RegID = WS.getRegisterID();
- bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();
+ bool ShouldInvalidateSuperRegs = WS.clearsSuperRegisters();
assert(RegID != 0 && "Invalidating an already invalid register?");
assert(WS.getCyclesLeft() != -512 &&
diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp
index 4d10704c10c..372be3e0d6d 100644
--- a/tools/llvm-mca/llvm-mca.cpp
+++ b/tools/llvm-mca/llvm-mca.cpp
@@ -388,6 +388,9 @@ int main(int argc, char **argv) {
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+ std::unique_ptr<MCInstrAnalysis> MCIA(
+ TheTarget->createMCInstrAnalysis(MCII.get()));
+
if (!MCPU.compare("native"))
MCPU = llvm::sys::getHostCPUName();
@@ -457,7 +460,7 @@ int main(int argc, char **argv) {
Width = DispatchWidth;
// Create an instruction builder.
- mca::InstrBuilder IB(*STI, *MCII);
+ mca::InstrBuilder IB(*STI, *MCII, *MRI, *MCIA);
// Number each region in the sequence.
unsigned RegionIdx = 0;