summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-09-05 08:58:51 -0700
committerTom Stellard <thomas.stellard@amd.com>2013-09-12 14:57:35 -0700
commit33be3a492e7dcb3ee5a2faa2929f24e268c1bbe0 (patch)
tree4ce1ff4562bfe46b6d340586c6269c45d9c10cc3
parentf3e6ae0e79997724e669ae02eb1c7b37130d066c (diff)
XXX: Barrier CFG
-rw-r--r--lib/Target/R600/AMDGPU.h1
-rw-r--r--lib/Target/R600/AMDGPUBarrierCFGFixer.cpp272
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp4
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp1
4 files changed, 277 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e2d1caf8676..a71a33cad1a 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -43,6 +43,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUBarrierCFGFixer();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
diff --git a/lib/Target/R600/AMDGPUBarrierCFGFixer.cpp b/lib/Target/R600/AMDGPUBarrierCFGFixer.cpp
new file mode 100644
index 00000000000..025a878b892
--- /dev/null
+++ b/lib/Target/R600/AMDGPUBarrierCFGFixer.cpp
@@ -0,0 +1,272 @@
+
+#define DEBUG_TYPE "amdgpubarriercfg"
+
+#include "AMDGPU.h"
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/InstVisitor.h"
+
+namespace llvm {
+ void initializeAMDGPUBarrierCFGFixerPass(PassRegistry&);
+}
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUBarrierCFGFixer : public FunctionPass,
+ public InstVisitor<AMDGPUBarrierCFGFixer> {
+
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ LoopInfo *LI;
+ std::list<BasicBlock*> BarrierBlocks;
+ bool isBarrierInst(const CallInst &I) const;
+ void splitBarrierBlock(BasicBlock *BarrierBlock, BasicBlock *BB);
+
+public:
+ AMDGPUBarrierCFGFixer() : FunctionPass(ID) {
+ initializeAMDGPUBarrierCFGFixerPass(*PassRegistry::getPassRegistry());
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "AMDGPU Barrier CFG Fixer";
+ }
+
+ void visitCallInst(CallInst &I);
+
+ static char ID;
+};
+
+} // End anonymous namespace
+
+char AMDGPUBarrierCFGFixer::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AMDGPUBarrierCFGFixer, "amgpubarriercfg",
+ "Legalize use of barrier instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(AMDGPUBarrierCFGFixer, "amdbpubarriercfg",
+ "Legalize use of barrier instructions", false, false)
+
+void AMDGPUBarrierCFGFixer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+}
+
+bool AMDGPUBarrierCFGFixer::isBarrierInst(const CallInst &I) const {
+ Function *F = I.getCalledFunction();
+ return F->getName().startswith("llvm.AMDGPU.barrier");
+}
+
+void AMDGPUBarrierCFGFixer::visitCallInst(CallInst &I) {
+ if (isBarrierInst(I)) {
+ BarrierBlocks.push_back(I.getParent());
+ }
+}
+
+void AMDGPUBarrierCFGFixer::splitBarrierBlock(BasicBlock *BarrierBlock,
+ BasicBlock *BB) {
+ DEBUG(dbgs() << "Splitting barrier block " << BarrierBlock->getName() <<
+ " for " << BB->getName() << "\n");
+ BasicBlock::iterator BarrierI, BarrierE;
+
+ // Find the location of the barrier instruction in the barrier block.
+ for (BarrierI = BarrierBlock->begin(), BarrierE = BarrierBlock->end();
+ BarrierI != BarrierE; ++BarrierI) {
+ const CallInst *Call = dyn_cast<CallInst>(BarrierI);
+ if (!Call) {
+ continue;
+ }
+ if (isBarrierInst(*Call)) {
+ break;
+ }
+ }
+
+ assert(BarrierI != BarrierE);
+ // Split the barrier block into three blocks
+ // Before:
+ // BB0:
+ // <Some Instructions A>
+ // Barrier
+ // <Some Instructions B>
+ //
+ // After:
+ // BB0:
+ // <Some Instructions A>
+ // br BB1
+ // BB1 (NewBarrierBB):
+ // Barrier
+ // br BB2
+ // BB2:
+ // <Some Instructions B>
+// dbgs() << "Splitting BarrierBB Front: ";
+// BarrierBlock->print(dbgs());
+ BasicBlock *BarrierA = BarrierBlock;
+ BasicBlock *BarrierB = BarrierA->splitBasicBlock(BarrierI,BarrierBlock->getName() + ".barrier");
+// dbgs() << "Splitting BarrerBB Back: ";
+// NewBarrierBB->print(dbgs());
+ BasicBlock *BarrierC = BarrierB->splitBasicBlock(++BarrierB->begin(), BarrierBlock->getName() + ".end");
+
+#if 1
+#endif
+
+ // Rewrite BB's branch instruction so that it points to NewBarrierBB
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ assert(Branch);
+ bool NonBarrierTrue = false;
+ BasicBlock *BBNonBarrierSucc = Branch->getSuccessor(0);
+ if (PDT->dominates(BarrierA, BBNonBarrierSucc)) {
+ BBNonBarrierSucc = Branch->getSuccessor(1);
+ Branch->setSuccessor(1, BarrierB);
+ NonBarrierTrue = true;
+ } else {
+ Branch->setSuccessor(0, BarrierB);
+ }
+// dbgs() << "Updated BB: ";
+// BB->print(dbgs());
+
+ // Add a PHI instruction to the NewBarrierBB
+// IRBuilder<> PhiBuilder(NewBarrierBB->begin());
+// PHINode *Phi = PhiBuilder.CreatePHI(Type::getInt1Ty(BB->getContext()), 2);
+// Phi->addIncoming(ConstantInt::getTrue(BB->getContext()), BB);
+// Phi->addIncoming(ConstantInt::getFalse(BB->getContext()), BarrierBlock);
+// dbgs() << "Added PHI to NewBarrierBB: ";
+// NewBarrierBB->print(dbgs());
+
+ // Insert a branch to the TerminatorBB
+ Instruction *TermInsert = BarrierB->getTerminator();
+ BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator());
+ assert(Br);
+ IRBuilder<> BrBuilder(TermInsert);
+ BrBuilder.CreateCondBr(Br->getCondition(),
+ NonBarrierTrue ? BBNonBarrierSucc : BarrierC,
+ NonBarrierTrue ? BarrierC : BBNonBarrierSucc);
+ TermInsert->eraseFromParent();
+// dbgs() << "Updated Br for NewBarrierBB";
+// NewBarrierBB->print(dbgs());
+ DT->runOnFunction(*BB->getParent());
+
+ // Update the PHI Nodes of the non-barrier successor
+ for (BasicBlock::iterator I = BBNonBarrierSucc->begin(), E = BBNonBarrierSucc->getFirstInsertionPt();
+ I != E; ++I) {
+ PHINode *User = dyn_cast<PHINode>(I);
+ assert(User);
+ int Idx = User->getBasicBlockIndex(BB);
+ if (Idx < 0) {
+ continue;
+ }
+ User->setIncomingBlock(Idx, BarrierB);
+ }
+
+
+ IRBuilder<> BarrierPhiBuilder(BarrierB->getFirstNonPHI());
+ for (BasicBlock::iterator I = BarrierC->begin(), E = BarrierC->end();
+ I != E; ++I) {
+ Instruction *Inst = dyn_cast<Instruction>(I);
+ assert(Inst);
+
+ for (unsigned i = 0; i < Inst->getNumOperands(); ++i) {
+ Instruction *Def = dyn_cast<Instruction>(Inst->getOperand(i));
+// Inst->dump();
+ if (!Def) {
+ continue;
+ }
+ DEBUG(dbgs() << "Adding live in for BarrierC:\n");
+ DEBUG(Def->print(dbgs()));
+ if (!DT->dominates(Def->getParent(), BarrierC)) {
+ DEBUG(dbgs() << "Dominates\n");
+ PHINode *Phi = BarrierPhiBuilder.CreatePHI(Def->getType(), 2);
+ Phi->addIncoming(Def, BarrierA);
+ Phi->addIncoming(UndefValue::get(Def->getType()), BB);
+ Inst->setOperand(i, Phi);
+ }
+ }
+ }
+
+ for (BasicBlock::iterator I = BarrierA->begin(), E = BarrierA->end();
+ I != E; ++I) {
+ for (Value::use_iterator U = I->use_begin(); U != I->use_end();) {
+ User *User = *U;
+ ++U;
+ Instruction *Use = dyn_cast<Instruction>(User);
+ if (!Use) {
+ continue;
+ }
+ if (DT->dominates(BarrierA, Use->getParent())) {
+ continue;
+ }
+ PHINode *Phi = BarrierPhiBuilder.CreatePHI(I->getType(), 2);
+ Phi->addIncoming(I, BarrierA);
+ Phi->addIncoming(UndefValue::get(I->getType()), BB);
+ User->replaceUsesOfWith(I, Phi);
+ }
+ }
+
+ BarrierBlocks.remove(BarrierA);
+}
+
+bool AMDGPUBarrierCFGFixer::runOnFunction(Function &F) {
+
+ PDT = &getAnalysis<PostDominatorTree>();
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+
+ bool Changed = true;
+
+ while (Changed) {
+// F.dump();
+ Changed = false;
+ // First pass find all the blocks containing a barrier instruction.
+ visit(F);
+
+ if (BarrierBlocks.empty()) {
+ break;
+ }
+
+ DEBUG(dbgs() << "There are " << BarrierBlocks.size() << " barrier blocks\n");
+
+ // Second pass
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = FI;
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
+ BasicBlock *SuccBB = *SI;
+ bool ModifiedSucc = false;
+ for (std::list<BasicBlock*>::const_iterator BBI = BarrierBlocks.begin();
+ BBI != BarrierBlocks.end();) {
+ BasicBlock *BarrierBlock = *BBI;
+ BBI++;
+ if (LI->getLoopDepth(BarrierBlock) > 0) {
+ continue;
+ }
+ if (PDT->dominates(BarrierBlock, SuccBB) &&
+ !PDT->dominates(BarrierBlock, BB)) {
+ DEBUG(dbgs() << "Succesor block: " << SuccBB->getName() << " meets criteria.\n");
+ splitBarrierBlock(BarrierBlock, BB);
+ Changed = true;
+ ModifiedSucc = true;
+ PDT->runOnFunction(F);
+// F.dump();
+ break;
+ }
+ }
+ if (ModifiedSucc) {
+ break;
+ }
+ }
+ }
+ BarrierBlocks.clear();
+ PDT->runOnFunction(F);
+ }
+ return false;
+}
+
+FunctionPass *llvm::createAMDGPUBarrierCFGFixer() {
+ return new AMDGPUBarrierCFGFixer();
+}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index a583f64d177..6130ff4f5c3 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -119,7 +119,9 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-// addPass(createFlattenCFGPass());
+ addPass(createAMDGPUBarrierCFGFixer());
+ addPass(createCFGSimplificationPass());
+ addPass(createFlattenCFGPass());
addPass(createStructurizeCFGPass());
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createSITypeRewriter());
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index 67b42d704f7..09427227b87 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -70,6 +70,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default: break;
+ case AMDGPU::IMPLICIT_DEF: MI.eraseFromParent(); break;
// Expand PRED_X to one of the PRED_SET instructions.
case AMDGPU::PRED_X: {
uint64_t Flags = MI.getOperand(3).getImm();