aboutsummaryrefslogtreecommitdiff
path: root/src/llvmopencl/WorkitemHandler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/llvmopencl/WorkitemHandler.cc')
-rw-r--r--src/llvmopencl/WorkitemHandler.cc278
1 files changed, 278 insertions, 0 deletions
diff --git a/src/llvmopencl/WorkitemHandler.cc b/src/llvmopencl/WorkitemHandler.cc
new file mode 100644
index 0000000..90ed294
--- /dev/null
+++ b/src/llvmopencl/WorkitemHandler.cc
@@ -0,0 +1,278 @@
+// LLVM function pass to replicate the kernel body for all work items
+// in a work group.
+//
+// Copyright (c) 2011-2012 Carlos Sánchez de La Lama / URJC and
+// Pekka Jääskeläinen / TUT
+// Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "config.h"
+#include <sstream>
+#include <iostream>
+
+#if (defined LLVM_3_1 or defined LLVM_3_2)
+#include "llvm/Metadata.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/ValueSymbolTable.h"
+#else
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "WorkitemHandler.h"
+#include "Kernel.h"
+
+//#define DEBUG_REFERENCE_FIXING
+
+namespace pocl {
+
+using namespace llvm;
+
+cl::opt<bool>
+AddWIMetadata("add-wi-metadata", cl::init(false), cl::Hidden,
+ cl::desc("Adds a work item identifier to each of the instruction in work items."));
+
+
+WorkitemHandler::WorkitemHandler(char& ID) : FunctionPass(ID)
+{
+}
+
+bool
+WorkitemHandler::runOnFunction(Function &F)
+{
+ return false;
+}
+
+void
+WorkitemHandler::Initialize(Kernel *K)
+{
+ llvm::Module *M = K->getParent();
+
+ LocalSizeX = 3;
+ LocalSizeY = 1;
+ LocalSizeZ = 1;
+
+// TODO: are we searching reqd_workgroup_size here? If so, we need to enforce it.
+ llvm::NamedMDNode *size_info = M->getNamedMetadata("opencl.kernel_wg_size_info");
+ if (size_info) {
+ for (unsigned i = 0, e = size_info->getNumOperands(); i != e; ++i) {
+ llvm::MDNode *KernelSizeInfo = size_info->getOperand(i);
+ if (KernelSizeInfo->getOperand(0) == K) {
+ LocalSizeX = (llvm::cast<ConstantInt>(KernelSizeInfo->getOperand(1)))->getLimitedValue();
+ LocalSizeY = (llvm::cast<ConstantInt>(KernelSizeInfo->getOperand(2)))->getLimitedValue();
+ LocalSizeZ = (llvm::cast<ConstantInt>(KernelSizeInfo->getOperand(3)))->getLimitedValue();
+ }
+ }
+ }
+
+ llvm::Type *localIdType;
+ if (M->getPointerSize() == llvm::Module::Pointer64)
+ size_t_width = 64;
+ else if (M->getPointerSize() == llvm::Module::Pointer32)
+ size_t_width = 32;
+ else
+ assert (false && "Only 32 and 64 bit size_t widths supported.");
+
+ localIdType = IntegerType::get(K->getContext(), size_t_width);
+
+ localIdZ = M->getOrInsertGlobal(POCL_LOCAL_ID_Z_GLOBAL, localIdType);
+ localIdY = M->getOrInsertGlobal(POCL_LOCAL_ID_Y_GLOBAL, localIdType);
+ localIdX = M->getOrInsertGlobal(POCL_LOCAL_ID_X_GLOBAL, localIdType);
+
+ GlobalVariable *gvx = M->getNamedGlobal(POCL_LOCAL_ID_X_GLOBAL);
+ GlobalVariable *gvy = M->getNamedGlobal(POCL_LOCAL_ID_Y_GLOBAL);
+ GlobalVariable *gvz = M->getNamedGlobal(POCL_LOCAL_ID_Z_GLOBAL);
+ gvx->setSection(StringRef("far"));
+ gvy->setSection(StringRef("far"));
+ gvz->setSection(StringRef("far"));
+
+ //Value *lsx = M->getOrInsertGlobal("_local_size_x", localIdType);
+ //Value *lsy = M->getOrInsertGlobal("_local_size_y", localIdType);
+ //Value *lsz = M->getOrInsertGlobal("_local_size_z", localIdType);
+ //GlobalVariable *gsx = M->getNamedGlobal("_local_size_x");
+ //GlobalVariable *gsy = M->getNamedGlobal("_local_size_y");
+ //GlobalVariable *gsz = M->getNamedGlobal("_local_size_z");
+ //gsx->setSection(StringRef("far"));
+ //gsy->setSection(StringRef("far"));
+ //gsz->setSection(StringRef("far"));
+}
+
+bool
+WorkitemHandler::dominatesUse
+(llvm::DominatorTree *DT, Instruction &I, unsigned i) {
+ Instruction *Op = cast<Instruction>(I.getOperand(i));
+ BasicBlock *OpBlock = Op->getParent();
+ PHINode *PN = dyn_cast<PHINode>(&I);
+
+ // DT can handle non phi instructions for us.
+ if (!PN)
+ {
+ // Definition must dominate use unless use is unreachable!
+ return Op->getParent() == I.getParent() ||
+ DT->dominates(Op, &I);
+ }
+
+ // PHI nodes are more difficult than other nodes because they actually
+ // "use" the value in the predecessor basic blocks they correspond to.
+ unsigned j = PHINode::getIncomingValueNumForOperand(i);
+ BasicBlock *PredBB = PN->getIncomingBlock(j);
+ return (PredBB && DT->dominates(OpBlock, PredBB));
+}
+
+/* Fixes the undominated variable uses.
+
+ These appear when a conditional barrier kernel is replicated to
+ form a copy of the *same basic block* in the alternative
+ "barrier path".
+
+ E.g., from
+
+ A -> [exit], A -> B -> [exit]
+
+ a replicated CFG as follows, is created:
+
+ A1 -> (T) A2 -> [exit1], A1 -> (F) A2' -> B1, B2 -> [exit2]
+
+ The regions are correct because of the barrier semantics
+ of "all or none". In case any barrier enters the [exit1]
+ from A1, all must (because there's a barrier in the else
+ branch).
+
+ Here at A2 and A2' one creates the same variables.
+ However, B2 does not know which copy
+ to refer to, the ones created in A2 or ones in A2' (correct).
+ The mapping data contains only one possibility, the
+ one that was placed there last. Thus, the instructions in B2
+ might end up referring to the variables defined in A2
+ which do not nominate them.
+
+ The variable references are fixed by exploiting the knowledge
+ of the naming convention of the cloned variables.
+
+ One potential alternative way would be to collect the refmaps per BB,
+ not globally. Then as a final phase traverse through the
+ basic blocks starting from the beginning and propagating the
+ reference data downwards, the data from the new BB overwriting
+ the old one. This should ensure the reachability without
+ the costly dominance analysis.
+*/
+bool
+WorkitemHandler::fixUndominatedVariableUses(llvm::DominatorTree *DT,
+ llvm::Function &F)
+{
+ bool changed = false;
+ DT->runOnFunction(F);
+
+ for (Function::iterator i = F.begin(), e = F.end(); i != e; ++i)
+ {
+ llvm::BasicBlock *bb = i;
+ for (llvm::BasicBlock::iterator ins = bb->begin(), inse = bb->end();
+ ins != inse; ++ins)
+ {
+ for (unsigned opr = 0; opr < ins->getNumOperands(); ++opr)
+ {
+ if (!isa<Instruction>(ins->getOperand(opr))) continue;
+ Instruction *operand = cast<Instruction>(ins->getOperand(opr));
+ if (dominatesUse(DT, *ins, opr))
+ continue;
+#ifdef DEBUG_REFERENCE_FIXING
+ std::cout << "### dominance error!" << std::endl;
+ operand->dump();
+ std::cout << "### does not dominate:" << std::endl;
+ ins->dump();
+#endif
+ StringRef baseName;
+ std::pair< StringRef, StringRef > pieces =
+ operand->getName().rsplit('.');
+ if (pieces.second.startswith("pocl_"))
+ baseName = pieces.first;
+ else
+ baseName = operand->getName();
+
+ Value *alternative = NULL;
+
+ unsigned int copy_i = 0;
+ do {
+ std::ostringstream alternativeName;
+ alternativeName << baseName.str();
+ if (copy_i > 0)
+ alternativeName << ".pocl_" << copy_i;
+
+ alternative =
+ F.getValueSymbolTable().lookup(alternativeName.str());
+
+ if (alternative != NULL)
+ {
+ ins->setOperand(opr, alternative);
+ if (dominatesUse(DT, *ins, opr))
+ break;
+ }
+
+ if (copy_i > 10000 && alternative == NULL)
+ break; /* ran out of possibilities */
+ ++copy_i;
+ } while (true);
+
+ if (alternative != NULL)
+ {
+#ifdef DEBUG_REFERENCE_FIXING
+ std::cout << "### found the alternative:" << std::endl;
+ alternative->dump();
+#endif
+ changed |= true;
+ } else {
+#ifdef DEBUG_REFERENCE_FIXING
+ std::cout << "### didn't fiund an alternative for" << std::endl;
+ operand->dump();
+ std::cerr << "### BB:" << std::endl;
+ operand->getParent()->dump();
+ std::cerr << "### the user BB:" << std::endl;
+ ins->getParent()->dump();
+#endif
+ std::cerr << "Could not find a dominating alternative variable." << std::endl;
+ abort();
+ }
+ }
+ }
+ }
+ return changed;
+}
+
+/**
+ * Moves the phi nodes in the beginning of the src to the beginning of
+ * the dst.
+ *
+ * MergeBlockIntoPredecessor function from llvm discards the phi nodes
+ * of the replicated BB because it has only one entry.
+ */
+void
+WorkitemHandler::movePhiNodes(llvm::BasicBlock* src, llvm::BasicBlock* dst)
+{
+ while (PHINode *PN = dyn_cast<PHINode>(src->begin()))
+ PN->moveBefore(dst->getFirstNonPHI());
+}
+
+
+} // namespace pocl