summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86TargetMachine.cpp
diff options
context:
space:
mode:
authorLama Saba <lama.saba@intel.com>2018-04-02 13:48:28 +0000
committerLama Saba <lama.saba@intel.com>2018-04-02 13:48:28 +0000
commitdfcd83999880349834e2635d89b4656dd7176910 (patch)
tree70b924ef7c84a200c2ec2ba4d63971314aff2a25 /llvm/lib/Target/X86/X86TargetMachine.cpp
parent6686edcb6c2d354bc04badc3e8ac6420d93f97fd (diff)
[X86] Reduce Store Forward Block issues in HW - Recommit after fixing Bug 36346
If a load follows a store and reloads data that the store has written to memory, Intel microarchitectures can in many cases forward the data directly from the store to the load, This "store forwarding" saves cycles by enabling the load to directly obtain the data instead of accessing the data from cache or memory. A "store forward block" occurs in cases that a store cannot be forwarded to the load. The most typical case of store forward block on Intel Core microarchiticutre that a small store cannot be forwarded to a large load. The estimated penalty for a store forward block is ~13 cycles. This pass tries to recognize and handle cases where "store forward block" is created by the compiler when lowering memcpy calls to a sequence of a load and a store. The pass currently only handles cases where memcpy is lowered to XMM/YMM registers, it tries to break the memcpy into smaller copies. breaking the memcpy should be possible since there is no atomicity guarantee for loads and stores to XMM/YMM. Differential revision: https://reviews.llvm.org/D41330 Change-Id: Ib48836ccdf6005989f7d4466fa2035b7b04415d9
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetMachine.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index d0841ba2df0..c93207555c4 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -62,6 +62,7 @@ void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86AvoidSFBPassPass(PassRegistry &);
} // end namespace llvm
@@ -80,6 +81,7 @@ extern "C" void LLVMInitializeX86Target() {
initializeX86CmovConverterPassPass(PR);
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
+ initializeX86AvoidSFBPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -449,6 +451,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86FixupSetCC());
addPass(createX86OptimizeLEAs());
addPass(createX86CallFrameOptimization());
+ addPass(createX86AvoidStoreForwardingBlocks());
}
addPass(createX86WinAllocaExpander());