diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-08-03 12:55:28 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-08-03 12:55:28 +0000 |
commit | d6b95e9be384f19b570d2d30e34a49f913afab6d (patch) | |
tree | bf1d2f07179636f948341eaa5ae28914e162188b /tools/llvm-mca | |
parent | 1a16c79be030215a94763e252c330c4fe6b4f4f1 (diff) |
[llvm-mca] Speed up the computation of the wait/ready/issued sets in the Scheduler.
This patch is a follow-up to r338702.
We don't need to use a map to model the wait/ready/issued sets. It is much more
efficient to use a vector instead.
This patch gives us an average 7.5% speedup (on top of the ~12% speedup obtained
after r338702).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338883 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mca')
-rw-r--r-- | tools/llvm-mca/ExecuteStage.cpp | 6 | ||||
-rw-r--r-- | tools/llvm-mca/Instruction.h | 5 | ||||
-rw-r--r-- | tools/llvm-mca/Scheduler.cpp | 139 | ||||
-rw-r--r-- | tools/llvm-mca/Scheduler.h | 64 |
4 files changed, 114 insertions, 100 deletions
diff --git a/tools/llvm-mca/ExecuteStage.cpp b/tools/llvm-mca/ExecuteStage.cpp index 437f864b072..1bc9c4a0dbd 100644 --- a/tools/llvm-mca/ExecuteStage.cpp +++ b/tools/llvm-mca/ExecuteStage.cpp @@ -37,7 +37,7 @@ void ExecuteStage::reclaimSchedulerResources() { // Update the scheduler's instruction queues. void ExecuteStage::updateSchedulerQueues() { SmallVector<InstRef, 4> InstructionIDs; - HWS.updateIssuedQueue(InstructionIDs); + HWS.updateIssuedSet(InstructionIDs); for (const InstRef &IR : InstructionIDs) notifyInstructionExecuted(IR); InstructionIDs.clear(); @@ -65,9 +65,9 @@ void ExecuteStage::issueReadyInstructions() { // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during // this same cycle if operands have ReadAdvance entries. Promote those - // instructions to the ReadyQueue and tell to the caller that we need + // instructions to the ReadySet and tell to the caller that we need // another round of 'issue()'. - HWS.promoteToReadyQueue(InstructionIDs); + HWS.promoteToReadySet(InstructionIDs); for (const InstRef &I : InstructionIDs) notifyInstructionReady(I); InstructionIDs.clear(); diff --git a/tools/llvm-mca/Instruction.h b/tools/llvm-mca/Instruction.h index 3b2f90528f2..66153cdb988 100644 --- a/tools/llvm-mca/Instruction.h +++ b/tools/llvm-mca/Instruction.h @@ -384,9 +384,12 @@ public: Instruction *getInstruction() { return second; } const Instruction *getInstruction() const { return second; } - /// Returns true if this InstRef has been populated. + /// Returns true if this references a valid instruction. bool isValid() const { return second != nullptr; } + /// Invalidate this reference. + void invalidate() { second = nullptr; } + #ifndef NDEBUG void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); } #endif diff --git a/tools/llvm-mca/Scheduler.cpp b/tools/llvm-mca/Scheduler.cpp index 553cd00bb3a..764cd23c40d 100644 --- a/tools/llvm-mca/Scheduler.cpp +++ b/tools/llvm-mca/Scheduler.cpp @@ -250,9 +250,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { #ifndef NDEBUG void Scheduler::dump() const { - dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n'; - dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n'; - dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n'; + dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; + dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; + dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; Resources->dump(); } #endif @@ -296,7 +296,7 @@ void Scheduler::issueInstructionImpl( IS->execute(); if (IS->isExecuting()) - IssuedQueue[IR.getSourceIndex()] = IS; + IssuedSet.emplace_back(IR); } // Release the buffered resources and issue the instruction. @@ -308,92 +308,109 @@ void Scheduler::issueInstruction( issueInstructionImpl(IR, UsedResources); } -void Scheduler::promoteToReadyQueue(SmallVectorImpl<InstRef> &Ready) { +void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) { // Scan the set of waiting instructions and promote them to the // ready queue if operands are all ready. - for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) { - const unsigned IID = I->first; - Instruction *IS = I->second; + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E; ) { + InstRef &IR = *I; + if (!IR.isValid()) + break; // Check if this instruction is now ready. In case, force // a transition in state using method 'update()'. - if (!IS->isReady()) - IS->update(); + Instruction &IS = *IR.getInstruction(); + if (!IS.isReady()) + IS.update(); - const InstrDesc &Desc = IS->getDesc(); + const InstrDesc &Desc = IS.getDesc(); bool IsMemOp = Desc.MayLoad || Desc.MayStore; - if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) { + if (!IS.isReady() || (IsMemOp && !LSU->isReady(IR))) { ++I; continue; } - Ready.emplace_back(IID, IS); - ReadyQueue[IID] = IS; - auto ToRemove = I; - ++I; - WaitQueue.erase(ToRemove); + Ready.emplace_back(IR); + ReadySet.emplace_back(IR); + + IR.invalidate(); + ++RemovedElements; + std::iter_swap(I, E - RemovedElements); } + + WaitSet.resize(WaitSet.size() - RemovedElements); } InstRef Scheduler::select() { - // Find the oldest ready-to-issue instruction in the ReadyQueue. - auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(), - [&](const QueueEntryTy &Entry) { - const InstrDesc &D = Entry.second->getDesc(); - return Resources->canBeIssued(D); - }); - - if (It == ReadyQueue.end()) - return {0, nullptr}; - - // We want to prioritize older instructions over younger instructions to - // minimize the pressure on the reorder buffer. We also want to - // rank higher the instructions with more users to better expose ILP. - - // Compute a rank value based on the age of an instruction (i.e. its source - // index) and its number of users. The lower the rank value, the better. - int Rank = It->first - It->second->getNumUsers(); - for (auto I = It, E = ReadyQueue.end(); I != E; ++I) { - int CurrentRank = I->first - I->second->getNumUsers(); - if (CurrentRank < Rank) { - const InstrDesc &D = I->second->getDesc(); + unsigned QueueIndex = ReadySet.size(); + int Rank = std::numeric_limits<int>::max(); + + for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { + const InstRef &IR = ReadySet[I]; + const unsigned IID = IR.getSourceIndex(); + const Instruction &IS = *IR.getInstruction(); + + // Compute a rank value based on the age of an instruction (i.e. its source + // index) and its number of users. The lower the rank value, the better. + int CurrentRank = IID - IS.getNumUsers(); + + // We want to prioritize older instructions over younger instructions to + // minimize the pressure on the reorder buffer. We also want to + // rank higher the instructions with more users to better expose ILP. + if (CurrentRank == Rank) + if (IID > ReadySet[QueueIndex].getSourceIndex()) + continue; + + if (CurrentRank <= Rank) { + const InstrDesc &D = IS.getDesc(); if (Resources->canBeIssued(D)) { Rank = CurrentRank; - It = I; + QueueIndex = I; } } } + if (QueueIndex == ReadySet.size()) + return InstRef(); + // We found an instruction to issue. - InstRef IR(It->first, It->second); - ReadyQueue.erase(It); + + InstRef IR = ReadySet[QueueIndex]; + std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); + ReadySet.pop_back(); return IR; } void Scheduler::updatePendingQueue(SmallVectorImpl<InstRef> &Ready) { // Notify to instructions in the pending queue that a new cycle just // started. - for (QueueEntryTy Entry : WaitQueue) - Entry.second->cycleEvent(); - promoteToReadyQueue(Ready); + for (InstRef &Entry : WaitSet) + Entry.getInstruction()->cycleEvent(); + promoteToReadySet(Ready); } -void Scheduler::updateIssuedQueue(SmallVectorImpl<InstRef> &Executed) { - for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) { - const QueueEntryTy Entry = *I; - Instruction *IS = Entry.second; - IS->cycleEvent(); - if (IS->isExecuted()) { - Executed.push_back({Entry.first, Entry.second}); - auto ToRemove = I; - ++I; - IssuedQueue.erase(ToRemove); - } else { - LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first +void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) { + unsigned RemovedElements = 0; + for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E; ) { + InstRef &IR = *I; + if (!IR.isValid()) + break; + Instruction &IS = *IR.getInstruction(); + IS.cycleEvent(); + if (!IS.isExecuted()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR << " is still executing.\n"); ++I; + continue; } + + Executed.emplace_back(IR); + ++RemovedElements; + IR.invalidate(); + std::iter_swap(I, E - RemovedElements); } + + IssuedSet.resize(IssuedSet.size() - RemovedElements); } void Scheduler::onInstructionExecuted(const InstRef &IR) { @@ -408,9 +425,8 @@ bool Scheduler::reserveResources(InstRef &IR) { // If necessary, reserve queue entries in the load-store unit (LSU). const bool Reserved = LSU->reserve(IR); if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR - << " to the Wait Queue\n"); - WaitQueue[IR.getSourceIndex()] = IR.getInstruction(); + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); + WaitSet.push_back(IR); return false; } return true; @@ -419,9 +435,8 @@ bool Scheduler::reserveResources(InstRef &IR) { bool Scheduler::issueImmediately(InstRef &IR) { const InstrDesc &Desc = IR.getInstruction()->getDesc(); if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR - << " to the Ready Queue\n"); - ReadyQueue[IR.getSourceIndex()] = IR.getInstruction(); + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); + ReadySet.push_back(IR); return false; } return true; diff --git a/tools/llvm-mca/Scheduler.h b/tools/llvm-mca/Scheduler.h index b0d2dbc53c6..8d44f61e32b 100644 --- a/tools/llvm-mca/Scheduler.h +++ b/tools/llvm-mca/Scheduler.h @@ -342,30 +342,28 @@ public: }; // namespace mca /// Class Scheduler is responsible for issuing instructions to pipeline -/// resources. Internally, it delegates to a ResourceManager the management of -/// processor resources. -/// This class is also responsible for tracking the progress of instructions -/// from the dispatch stage, until the write-back stage. +/// resources. /// -/// An nstruction dispatched to the Scheduler is initially placed into either -/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the -/// input operands. Instructions in the WaitQueue are ordered by instruction -/// index. An instruction is moved from the WaitQueue to the ReadyQueue when -/// register operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitQueue to the ReadyQueue transition -/// from state 'IS_AVAILABLE' to state 'IS_READY'. +/// Internally, it delegates to a ResourceManager the management of processor +/// resources. This class is also responsible for tracking the progress of +/// instructions from the dispatch stage, until the write-back stage. /// -/// At the beginning of each cycle, the Scheduler checks if there are -/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the -/// ReadyQueue is not empty, then older instructions from the queue are issued -/// to the processor pipelines, and the underlying ResourceManager is updated -/// accordingly. The ReadyQueue is ordered by instruction index to guarantee -/// that the first instructions in the set are also the oldest. +/// An instruction dispatched to the Scheduler is initially placed into either +/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input +/// operands. /// -/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is -/// issued to a (one or more) pipeline(s). This event also causes an instruction -/// state transition (i.e. from state IS_READY, to state IS_EXECUTING). -/// An Instruction leaves the IssuedQueue when it reaches the write-back stage. +/// An instruction is moved from the WaitSet to the ReadySet when register +/// operands become available, and all memory dependencies are met. +/// Instructions that are moved from the WaitSet to the ReadySet transition +/// in state from 'IS_AVAILABLE' to 'IS_READY'. +/// +/// On every cycle, the Scheduler checks if it can promote instructions from the +/// WaitSet to the ReadySet. +/// +/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued +/// to a (one or more) pipeline(s). This event also causes an instruction state +/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction +/// leaves the IssuedSet when it reaches the write-back stage. class Scheduler : public HardwareUnit { const llvm::MCSchedModel &SM; @@ -373,10 +371,9 @@ class Scheduler : public HardwareUnit { std::unique_ptr<ResourceManager> Resources; std::unique_ptr<LSUnit> LSU; - using QueueEntryTy = std::pair<unsigned, Instruction *>; - std::map<unsigned, Instruction *> WaitQueue; - std::map<unsigned, Instruction *> ReadyQueue; - std::map<unsigned, Instruction *> IssuedQueue; + std::vector<InstRef> WaitSet; + std::vector<InstRef> ReadySet; + std::vector<InstRef> IssuedSet; /// Issue an instruction without updating the ready queue. void issueInstructionImpl( @@ -412,7 +409,7 @@ public: /// zero-latency instructions). /// /// Returns true if the instruction is issued immediately. If this does not - /// occur, then the instruction will be added to the Scheduler's ReadyQueue. + /// occur, then the instruction will be added to the Scheduler's ReadySet. bool issueImmediately(InstRef &IR); /// Reserve one entry in each buffered resource. @@ -431,15 +428,15 @@ public: /// once they have been fully consumed. void reclaimSimulatedResources(llvm::SmallVectorImpl<ResourceRef> &Freed); - /// Move instructions from the WaitQueue to the ReadyQueue if input operands + /// Move instructions from the WaitSet to the ReadySet if input operands /// are all available. - void promoteToReadyQueue(llvm::SmallVectorImpl<InstRef> &Ready); + void promoteToReadySet(llvm::SmallVectorImpl<InstRef> &Ready); /// Update the ready queue. void updatePendingQueue(llvm::SmallVectorImpl<InstRef> &Ready); /// Update the issued queue. - void updateIssuedQueue(llvm::SmallVectorImpl<InstRef> &Executed); + void updateIssuedSet(llvm::SmallVectorImpl<InstRef> &Executed); /// Updates the Scheduler's resources to reflect that an instruction has just /// been executed. @@ -456,7 +453,7 @@ public: /// execute the given instruction immediately. bool reserveResources(InstRef &IR); - /// Select the next instruction to issue from the ReadyQueue. + /// Select the next instruction to issue from the ReadySet. /// This method gives priority to older instructions. InstRef select(); @@ -467,10 +464,9 @@ public: // This routine performs a sanity check. This routine should only be called // when we know that 'IR' is not in the scheduler's instruction queues. void sanityCheck(const InstRef &IR) const { - const unsigned Idx = IR.getSourceIndex(); - assert(WaitQueue.find(Idx) == WaitQueue.end()); - assert(ReadyQueue.find(Idx) == ReadyQueue.end()); - assert(IssuedQueue.find(Idx) == IssuedQueue.end()); + assert(llvm::find(WaitSet, IR) == WaitSet.end()); + assert(llvm::find(ReadySet, IR) == ReadySet.end()); + assert(llvm::find(IssuedSet, IR) == IssuedSet.end()); } #endif // !NDEBUG }; |