summaryrefslogtreecommitdiff
path: root/lib/Target/AMDIL
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDIL')
-rw-r--r--lib/Target/AMDIL/AMDGPUConvertToISA.cpp65
-rw-r--r--lib/Target/AMDIL/AMDGPULowerInstructions.cpp82
-rw-r--r--lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp38
-rw-r--r--lib/Target/AMDIL/AMDGPULowerShaderInstructions.h40
-rw-r--r--lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp66
-rw-r--r--lib/Target/AMDIL/AMDILAlgorithms.tpp93
-rw-r--r--lib/Target/AMDIL/AMDILCFGStructurizer.cpp3250
-rw-r--r--lib/Target/AMDIL/AMDILCodeEmitter.h46
-rw-r--r--lib/Target/AMDIL/AMDILMCCodeEmitter.cpp158
-rw-r--r--lib/Target/AMDIL/AMDILMachinePeephole.cpp173
-rw-r--r--lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp1138
-rw-r--r--lib/Target/AMDIL/R600CodeEmitter.cpp749
-rw-r--r--lib/Target/AMDIL/R600KernelParameters.cpp503
-rw-r--r--lib/Target/AMDIL/R600KernelParameters.h28
-rw-r--r--lib/Target/AMDIL/R600LowerInstructions.cpp502
-rw-r--r--lib/Target/AMDIL/R600LowerShaderInstructions.cpp143
-rw-r--r--lib/Target/AMDIL/R600OpenCLUtils.h49
-rw-r--r--lib/Target/AMDIL/SIAssignInterpRegs.cpp110
-rw-r--r--lib/Target/AMDIL/SICodeEmitter.cpp274
-rw-r--r--lib/Target/AMDIL/SILowerShaderInstructions.cpp90
-rw-r--r--lib/Target/AMDIL/SIPropagateImmReads.cpp70
21 files changed, 7667 insertions, 0 deletions
diff --git a/lib/Target/AMDIL/AMDGPUConvertToISA.cpp b/lib/Target/AMDIL/AMDGPUConvertToISA.cpp
new file mode 100644
index 00000000000..ce947f8ff78
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUConvertToISA.cpp
@@ -0,0 +1,65 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers AMDIL machine instructions to the appropriate hardware
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ void lowerFLT(MachineInstr &MI);
+
+ public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ MachineInstr * newInstr = TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ if (!newInstr) {
+ continue;
+ }
+ MBB.insert(I, newInstr);
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/AMDIL/AMDGPULowerInstructions.cpp b/lib/Target/AMDIL/AMDGPULowerInstructions.cpp
new file mode 100644
index 00000000000..b49d0dddf65
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPULowerInstructions.cpp
@@ -0,0 +1,82 @@
+//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDGPULowerInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+ void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I,
+ MachineBasicBlock &MBB, MachineFunction &MF);
+
+ public:
+ AMDGPULowerInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char AMDGPULowerInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPULowerInstructionsPass(TargetMachine &tm) {
+ return new AMDGPULowerInstructionsPass(tm);
+}
+
+bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+
+ switch (MI.getOpcode()) {
+ default: continue;
+ case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break;
+
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI,
+ MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF)
+{
+ MachineRegisterInfo & MRI = MF.getRegInfo();
+ unsigned tmp = MRI.createVirtualRegister(
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+
+ BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::IMPLICIT_DEF), tmp);
+
+ BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::INSERT_SUBREG))
+ .addOperand(MI.getOperand(0))
+ .addReg(tmp)
+ .addOperand(MI.getOperand(1))
+ .addImm(AMDIL::sel_x);
+}
diff --git a/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp
new file mode 100644
index 00000000000..d33055ccb87
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp
@@ -0,0 +1,38 @@
+//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPULowerShaderInstructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF,
+ const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const
+{
+ if (!MRI->isLiveIn(physReg)) {
+ MRI->addLiveIn(physReg, virtReg);
+ MachineBasicBlock &EntryMBB = MF->front();
+ BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ virtReg)
+ .addReg(physReg);
+ } else {
+ /* We can't mark the same register as preloaded twice, but we still must
+ * associate virtReg with the correct preloaded register. */
+ unsigned newReg = MRI->getLiveInVirtReg(physReg);
+ MRI->replaceRegWith(virtReg, newReg);
+ }
+}
diff --git a/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h
new file mode 100644
index 00000000000..5ee77fafe2b
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h
@@ -0,0 +1,40 @@
+//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS
+#define AMDGPU_LOWER_SHADER_INSTRUCTIONS
+
+namespace llvm {
+
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+class AMDGPULowerShaderInstructionsPass {
+
+ protected:
+ MachineRegisterInfo * MRI;
+ /**
+ * @param physReg The physical register that will be preloaded.
+ * @param virtReg The virtual register that currently holds the
+ * preloaded value.
+ */
+ void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII,
+ unsigned physReg, unsigned virtReg) const;
+};
+
+} // end namespace llvm
+
+
+#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS
diff --git a/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp b/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp
new file mode 100644
index 00000000000..c923f19c39f
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp
@@ -0,0 +1,66 @@
+//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Function.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; }
+ };
+} /* End anonymous namespace */
+
+char AMDGPUReorderPreloadInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) {
+ return new AMDGPUReorderPreloadInstructionsPass(tm);
+}
+
+/* This pass moves instructions that represent preloaded registers to the
+ * start of the program. */
+bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ if (TII->isRegPreload(MI)) {
+ MF.front().insert(MF.front().begin(), MI.removeFromParent());
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/AMDIL/AMDILAlgorithms.tpp b/lib/Target/AMDIL/AMDILAlgorithms.tpp
new file mode 100644
index 00000000000..058475f0f98
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILAlgorithms.tpp
@@ -0,0 +1,93 @@
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ F(*First, Second);
+ }
+ return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ if (F(*First, Second)) {
+ --First;
+ }
+ }
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ binaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ safeBinaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+ for ( ; First!=Last; ++First ) F(&First)
+ ; // Do nothing.
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
diff --git a/lib/Target/AMDIL/AMDILCFGStructurizer.cpp b/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
new file mode 100644
index 00000000000..289af6f210e
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3250 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structcfg"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// bixia TODO: move this out to analysis lib. Make this work for both target
+// AMDIL and CBackend.
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+ {
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+static RegisterPass<MachinePostDominatorTree>
+machinePostDominatorTreePass("machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true);
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass);
+
+private:
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::run\n";
+ //errs() << func.getFunction()->getNameStr() << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1)
+ {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+#if 1
+
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+#else
+ return -1;
+#endif
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDIL::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+ : CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false)
+ {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
+ .addImm(i - 1); //preVal
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDIL branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME)
+ {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+//private:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+//public:
+// static char ID;
+
+public:
+ AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ //bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()) {
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+const char *AMDILCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+const char *AMDILCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer>
+{
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDIL::BRANCH:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isPhimove(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!isPhimove(instr)) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
+ // does not clone the AsmPrinterFlags.
+ instr->setAsmPrinterFlag(
+ (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDIL instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDIL::CONTINUE
+ && iter->getOpcode() == AMDIL::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+ *this);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+ *this);
+}
+
+//end of file newline goes below
+
diff --git a/lib/Target/AMDIL/AMDILCodeEmitter.h b/lib/Target/AMDIL/AMDILCodeEmitter.h
new file mode 100644
index 00000000000..b0ea1455cf9
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILCodeEmitter.h
@@ -0,0 +1,46 @@
+// The LLVM Compiler Infrastructure
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+
+#ifndef AMDILCODEEMITTER_H
+#define AMDILCODEEMITTER_H
+
+namespace llvm {
+
+ /* XXX: Temp HACK to work around tablegen name generation */
+ class AMDILCodeEmitter {
+ public:
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const { return 0; }
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ };
+
+} // End namespace llvm
+
+#endif // AMDILCODEEMITTER_H
diff --git a/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp b/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
new file mode 100644
index 00000000000..9366f2e7bcb
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
@@ -0,0 +1,158 @@
+//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdil-emitter"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+#if 0
+namespace {
+ class AMDILMCCodeEmitter : public MCCodeEmitter {
+ AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
+ void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ MCContext &Ctx;
+ bool Is64BitMode;
+ public:
+ AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
+ ~AMDILMCCodeEmitter();
+ unsigned getNumFixupKinds() const;
+ const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const;
+ static unsigned GetAMDILRegNum(const MCOperand &MO);
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const;
+ void EmitImmediate(const MCOperand &Disp, unsigned ImmSize,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ }; // class AMDILMCCodeEmitter
+}; // anonymous namespace
+
+namespace llvm {
+ MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
+ TargetMachine &TM, MCContext &Ctx)
+ {
+ return new AMDILMCCodeEmitter(TM, Ctx, false);
+ }
+}
+
+AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx
+ , bool is64Bit)
+: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx)
+{
+ Is64BitMode = is64Bit;
+}
+
+AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
+{
+}
+
+unsigned
+AMDILMCCodeEmitter::getNumFixupKinds() const
+{
+ return 0;
+}
+
+const MCFixupKindInfo &
+AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
+{
+// const static MCFixupKindInfo Infos[] = {};
+ if (Kind < FirstTargetFixupKind) {
+ return MCCodeEmitter::getFixupKindInfo(Kind);
+ }
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return MCCodeEmitter::getFixupKindInfo(Kind);
+ // return Infos[Kind - FirstTargetFixupKind];
+
+}
+
+void
+AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
+ raw_ostream &OS) const
+{
+ OS << (char) C;
+ ++CurByte;
+}
+void
+AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const
+{
+ // Output the constant in little endian byte order
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+}
+void
+AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
+{
+ // If this is a simple integer displacement that doesn't require a relocation
+ // emit it now.
+ if (DispOp.isImm()) {
+ EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS);
+ }
+
+ // If we have an immoffset, add it to the expression
+ const MCExpr *Expr = DispOp.getExpr();
+
+ if (ImmOffset) {
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(ImmOffset, Ctx), Ctx);
+ }
+ // Emit a symbolic constant as a fixup and 4 zeros.
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ // TODO: Why the 4 zeros?
+ EmitConstant(0, ImmSize, CurByte, OS);
+}
+
+void
+AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const
+{
+#if 0
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ unsigned TSFlags = Desc.TSFlags;
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+
+ unsigned char BaseOpcode = 0;
+#ifndef NDEBUG
+ // FIXME: Verify.
+ if (// !Desc.isVariadic() &&
+ CurOp != NumOps) {
+ errs() << "Cannot encode all operands of: ";
+ MI.dump();
+ errs() << '\n';
+ abort();
+ }
+#endif
+#endif
+}
+#endif
diff --git a/lib/Target/AMDIL/AMDILMachinePeephole.cpp b/lib/Target/AMDIL/AMDILMachinePeephole.cpp
new file mode 100644
index 00000000000..b8e536361f0
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILMachinePeephole.cpp
@@ -0,0 +1,173 @@
+//===-- AMDILMachinePeephole.cpp - AMDIL Machine Peephole Pass -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "machine_peephole"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+
+#include "AMDIL.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+namespace
+{
+ class AMDILMachinePeephole : public MachineFunctionPass
+ {
+ public:
+ static char ID;
+ AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ //virtual ~AMDILMachinePeephole();
+ virtual const char*
+ getPassName() const;
+ virtual bool
+ runOnMachineFunction(MachineFunction &MF);
+ private:
+ void insertFence(MachineBasicBlock::iterator &MIB);
+ TargetMachine &TM;
+ bool mDebug;
+ }; // AMDILMachinePeephole
+ char AMDILMachinePeephole::ID = 0;
+} // anonymous namespace
+
+namespace llvm
+{
+ FunctionPass*
+ createAMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ {
+ return new AMDILMachinePeephole(tm AMDIL_OPT_LEVEL_VAR);
+ }
+} // llvm namespace
+
+AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ : MachineFunctionPass(ID), TM(tm)
+{
+ mDebug = DEBUGME;
+}
+
+bool
+AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
+{
+ bool Changed = false;
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
+ MBB != MBE; ++MBB) {
+ MachineBasicBlock *mb = MBB;
+ for (MachineBasicBlock::iterator MIB = mb->begin(), MIE = mb->end();
+ MIB != MIE; ++MIB) {
+ MachineInstr *mi = MIB;
+ const char * name;
+ name = TM.getInstrInfo()->getName(mi->getOpcode());
+ switch (mi->getOpcode()) {
+ default:
+ if (isAtomicInst(TM.getInstrInfo(), mi)) {
+ // If we don't support the hardware accellerated address spaces,
+ // then the atomic needs to be transformed to the global atomic.
+ if (strstr(name, "_L_")
+ && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::ADD_i32), AMDIL::R1011)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(AMDIL::T2);
+ mi->getOperand(1).setReg(AMDIL::R1011);
+ mi->setDesc(
+ TM.getInstrInfo()->get(
+ (mi->getOpcode() - AMDIL::ATOM_L_ADD) + AMDIL::ATOM_G_ADD));
+ } else if (strstr(name, "_R_")
+ && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) {
+ assert(!"Software region memory is not supported!");
+ mi->setDesc(
+ TM.getInstrInfo()->get(
+ (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
+ }
+ } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) {
+ insertFence(MIB);
+ }
+ continue;
+ break;
+ case AMDIL::USHR_i16:
+ case AMDIL::USHR_v2i16:
+ case AMDIL::USHR_v4i16:
+ case AMDIL::USHRVEC_i16:
+ case AMDIL::USHRVEC_v2i16:
+ case AMDIL::USHRVEC_v4i16:
+ if (TM.getSubtarget<AMDILSubtarget>()
+ .device()->usesSoftware(AMDILDeviceInfo::ShortOps)) {
+ unsigned lReg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRI32RegClass);
+ unsigned Reg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRV4I32RegClass);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::LOADCONST_i32),
+ lReg).addImm(0xFFFF);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32),
+ Reg)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(lReg);
+ mi->getOperand(1).setReg(Reg);
+ }
+ break;
+ case AMDIL::USHR_i8:
+ case AMDIL::USHR_v2i8:
+ case AMDIL::USHR_v4i8:
+ case AMDIL::USHRVEC_i8:
+ case AMDIL::USHRVEC_v2i8:
+ case AMDIL::USHRVEC_v4i8:
+ if (TM.getSubtarget<AMDILSubtarget>()
+ .device()->usesSoftware(AMDILDeviceInfo::ByteOps)) {
+ unsigned lReg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRI32RegClass);
+ unsigned Reg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRV4I32RegClass);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::LOADCONST_i32),
+ lReg).addImm(0xFF);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32),
+ Reg)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(lReg);
+ mi->getOperand(1).setReg(Reg);
+ }
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+const char*
+AMDILMachinePeephole::getPassName() const
+{
+ return "AMDIL Generic Machine Peephole Optimization Pass";
+}
+
+void
+AMDILMachinePeephole::insertFence(MachineBasicBlock::iterator &MIB)
+{
+ MachineInstr *MI = MIB;
+ MachineInstr *fence = BuildMI(*(MI->getParent()->getParent()),
+ MI->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+
+ MI->getParent()->insert(MIB, fence);
+ fence = BuildMI(*(MI->getParent()->getParent()),
+ MI->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+ MIB = MI->getParent()->insertAfter(MIB, fence);
+}
diff --git a/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp b/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 00000000000..5fe9f53c8c8
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1138 @@
+//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ ~AMDILPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDILSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDILPeepholeOpt
+ char AMDILPeepholeOpt::ID = 0;
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ {
+ return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
+ }
+} // llvm namespace
+
+AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ : FunctionPass(ID), TM(tm)
+{
+ mDebug = DEBUGME;
+ optLevel = TM.getOptLevel();
+
+}
+
+AMDILPeepholeOpt::~AMDILPeepholeOpt()
+{
+}
+
+const char *
+AMDILPeepholeOpt::getPassName() const
+{
+ return "AMDIL PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty)
+{
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
+{
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
+{
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+{
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+}
+
+bool
+AMDILPeepholeOpt::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDILSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+
+bool
+AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+ mConvertAtomics = false;
+ }
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift)
+{
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ int32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ int32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (int32_t)(LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (int32_t)(RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (int32_t)(LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (int32_t)(RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if (mDebug) {
+ dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
+ dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
+ dbgs() << (RHSMask ? " & E)" : ")");
+ dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
+ dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
+ dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
+ dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
+ dbgs() << "width(B) = " << lhsMaskWidth;
+ dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
+ dbgs() << "offset(B) = " << lhsMaskOffset;
+ dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
+ dbgs() << "Constraints: \n";
+ dbgs() << "\t(1) B ^ E == 0\n";
+ dbgs() << "\t(2-LHS) B is a mask\n";
+ dbgs() << "\t(2-LHS) E is a mask\n";
+ dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
+ dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
+ }
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
+ dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
+ dbgs() << "Failed constraint 1!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "LHS = " << lhsMaskOffset << "";
+ dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
+ dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
+ dbgs() << "\nRHS = " << rhsMaskOffset << "";
+ dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
+ dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
+ dbgs() << "\n";
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_extract";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[3] = {
+ newMaskConst,
+ shiftValConst,
+ ShiftInst->getOperand(0)
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFI(CallInst *CI)
+{
+ if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFM(CallInst *CI)
+{
+ if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+{
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+ if (!Ty->isStructTy()) {
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+{
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[2] = {
+ CI->getOperand(0),
+ CI->getOperand(1)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+{
+ return (CI != NULL
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+ == "__amdil_get_local_size_int");
+}
+
+bool
+AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman")) {
+ return false;
+ }
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+ == "__amdil_improved_div";
+}
+
+void
+AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
+{
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
+{
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+ funcNameIdx = CI->getNumOperands() - 1;
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+ samplerIdx = 1;
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/lib/Target/AMDIL/R600CodeEmitter.cpp b/lib/Target/AMDIL/R600CodeEmitter.cpp
new file mode 100644
index 00000000000..8faf0deb8c5
--- /dev/null
+++ b/lib/Target/AMDIL/R600CodeEmitter.cpp
@@ -0,0 +1,749 @@
+//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUUtil.h"
+#include "AMDILCodeEmitter.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+ class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
+
+ private:
+
+ static char ID;
+ formatted_raw_ostream &_OS;
+ const TargetMachine * TM;
+ const MachineRegisterInfo * MRI;
+ const R600RegisterInfo * TRI;
+ bool evergreenEncoding;
+
+ bool isReduction;
+ unsigned reductionElement;
+ bool isLast;
+
+ unsigned section_start;
+
+ public:
+
+ R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
+ _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+ isLast(true) { }
+
+ const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ private:
+
+ void emitALUInstr(MachineInstr &MI);
+ void emitSrc(const MachineOperand & MO);
+ void emitDst(const MachineOperand & MO);
+ void emitALU(MachineInstr &MI, unsigned numSrc);
+ void emitTexInstr(MachineInstr &MI);
+ void emitFCInstr(MachineInstr &MI);
+
+ unsigned int getHWInst(const MachineInstr &MI);
+
+ void emitNullBytes(unsigned int byteCount);
+
+ void emitByte(unsigned int byte);
+
+ void emitTwoBytes(uint32_t bytes);
+
+ void emit(uint32_t value);
+ void emit(uint64_t value);
+
+ unsigned getHWReg(unsigned regNo) const;
+
+ unsigned getElement(unsigned regNo);
+
+};
+
+} /* End anonymous namespace */
+
+#define WRITE_MASK_X 0x1
+#define WRITE_MASK_Y 0x2
+#define WRITE_MASK_Z 0x4
+#define WRITE_MASK_W 0x8
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC,
+ INSTR_NATIVE,
+ INSTR_VTX
+};
+
+enum FCInstr {
+ FC_IF = 0,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK,
+ FC_BREAK_NZ_INT,
+ FC_CONTINUE,
+ FC_BREAK_Z_INT
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+char R600CodeEmitter::ID = 0;
+
+FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
+ return new R600CodeEmitter(OS);
+}
+
+bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+
+ TM = &MF.getTarget();
+ MRI = &MF.getRegInfo();
+ TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
+ const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
+ std::string gpu = STM.getDeviceName();
+ if (!gpu.compare(0,3, "rv7")) {
+ evergreenEncoding = false;
+ } else {
+ evergreenEncoding = true;
+ }
+ const AMDGPUTargetMachine *amdtm =
+ static_cast<const AMDGPUTargetMachine *>(&MF.getTarget());
+
+ if (amdtm->shouldDumpCode()) {
+ MF.dump();
+ }
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
+ continue;
+ }
+ if (isTexOp(MI.getOpcode())) {
+ emitTexInstr(MI);
+ } else if (isFCOp(MI.getOpcode())){
+ emitFCInstr(MI);
+ } else if (isReductionOp(MI.getOpcode())) {
+ isReduction = true;
+ isLast = false;
+ for (reductionElement = 0; reductionElement < 4; reductionElement++) {
+ isLast = (reductionElement == 3);
+ emitALUInstr(MI);
+ }
+ isReduction = false;
+ } else if (MI.getOpcode() == AMDIL::RETURN ||
+ MI.getOpcode() == AMDIL::BUNDLE ||
+ MI.getOpcode() == AMDIL::KILL) {
+ continue;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDIL::RAT_WRITE_CACHELESS_eg:
+ {
+ /* XXX: Support for autoencoding 64-bit instructions was added
+ * in LLVM 3.1. Until we drop support for 3.0, we will use Magic
+ * numbers for the high bits. */
+ uint64_t high = 0x95c0100000000000;
+ uint64_t inst = getBinaryCodeForInstr(MI);
+ inst |= high;
+ /* Set End Of Program bit */
+ /* XXX: Need better check of end of program. EOP should be
+ * encoded in one of the operands of the MI, and it should be
+ * set in a prior pass. */
+ MachineBasicBlock::iterator NextI = llvm::next(I);
+ MachineInstr &NextMI = *NextI;
+ if (NextMI.getOpcode() == AMDIL::RETURN) {
+ inst |= (((uint64_t)1) << 53);
+ }
+ emitByte(INSTR_NATIVE);
+ emit(inst);
+ break;
+ }
+ case AMDIL::VTX_READ_eg:
+ {
+ emitByte(INSTR_VTX);
+ /* inst */
+ emitByte(0);
+
+ /* fetch_type */
+ emitByte(2);
+
+ /* buffer_id */
+ emitByte(MI.getOperand(2).getImm());
+
+ /* src_gpr */
+ emitByte(getHWReg(MI.getOperand(1).getReg()));
+
+ /* src_sel_x */
+ emitByte(TRI->getHWRegChan(MI.getOperand(1).getReg()));
+
+ /* mega_fetch_count */
+ emitByte(3);
+
+ /* dst_gpr */
+ emitByte(getHWReg(MI.getOperand(0).getReg()));
+
+ /* dst_sel_x */
+ emitByte(0);
+
+ /* dst_sel_y */
+ emitByte(7);
+
+ /* dst_sel_z */
+ emitByte(7);
+
+ /* dst_sel_w */
+ emitByte(7);
+
+ /* use_const_fields */
+ emitByte(1);
+
+ /* data_format */
+ emitByte(0);
+
+ /* num_format_all */
+ emitByte(0);
+
+ /* format_comp_all */
+ emitByte(0);
+
+ /* srf_mode_all */
+ emitByte(0);
+
+ /* offset */
+ emitByte(0);
+
+ /* endian */
+ emitByte(0);
+ break;
+ }
+
+ default:
+ emitALUInstr(MI);
+ break;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
+{
+
+ unsigned numOperands = MI.getNumExplicitOperands();
+
+ /* Some instructions are just place holder instructions that represent
+ * operations that the GPU does automatically. They should be ignored. */
+ if (isPlaceHolderOpcode(MI.getOpcode())) {
+ return;
+ }
+
+ /* We need to handle some opcodes differently */
+ switch (MI.getOpcode()) {
+ default: break;
+
+ /* XXX: Temp Hack */
+ case AMDIL::STORE_OUTPUT:
+ numOperands = 2;
+ break;
+ }
+
+ /* XXX Check if instruction writes a result */
+ if (numOperands < 1) {
+ return;
+ }
+ const MachineOperand dstOp = MI.getOperand(0);
+
+ /* Emit instruction type */
+ emitByte(0);
+
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ /* Literal constants are always stored as the last operand. */
+ if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+ break;
+ }
+ emitSrc(MI.getOperand(opIndex));
+ }
+
+ /* Emit zeros for unused sources */
+ for ( ; opIndex < 4; opIndex++) {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
+
+ emitDst(dstOp);
+
+ emitALU(MI, numOperands - 1);
+}
+
+void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+{
+ uint32_t value = 0;
+ /* Emit the source select (2 bytes). For GPRs, this is the register index.
+ * For other potential instruction operands, (e.g. constant registers) the
+ * value of the source select is defined in the r600isa docs. */
+ if (MO.isReg()) {
+ unsigned reg = MO.getReg();
+ emitTwoBytes(getHWReg(reg));
+ if (reg == AMDIL::ALU_LITERAL_X) {
+ const MachineInstr * parent = MO.getParent();
+ unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
+ MachineOperand immOp = parent->getOperand(immOpIndex);
+ if (immOp.isFPImm()) {
+ value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
+ } else {
+ assert(immOp.isImm());
+ value = immOp.getImm();
+ }
+ }
+ } else {
+ /* XXX: Handle other operand types. */
+ emitTwoBytes(0);
+ }
+
+ /* Emit the source channel (1 byte) */
+ if (isReduction) {
+ emitByte(reductionElement);
+ } else if (MO.isReg()) {
+ emitByte(TRI->getHWRegChan(MO.getReg()));
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit isNegated (1 byte) */
+ if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
+ && (MO.getTargetFlags() & MO_FLAG_NEG ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDIL::NEG_ONE || MO.getReg() == AMDIL::NEG_HALF)))){
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* Emit isAbsolute (1 byte) */
+ if (MO.getTargetFlags() & MO_FLAG_ABS) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit relative addressing mode (1 byte) */
+ emitByte(0);
+
+ /* Emit kc_bank, This will be adjusted later by r600_asm */
+ emitByte(0);
+
+ /* Emit the literal value, if applicable (4 bytes). */
+ emit(value);
+
+}
+
+void R600CodeEmitter::emitDst(const MachineOperand & MO)
+{
+ if (MO.isReg()) {
+ /* Emit the destination register index (1 byte) */
+ emitByte(getHWReg(MO.getReg()));
+
+ /* Emit the element of the destination register (1 byte)*/
+ if (isReduction) {
+ emitByte(reductionElement);
+ } else {
+ emitByte(TRI->getHWRegChan(MO.getReg()));
+ }
+
+ /* Emit isClamped (1 byte) */
+ if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* Emit writemask (1 byte). */
+ if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+ || MO.getTargetFlags() & MO_FLAG_MASK) {
+ emitByte(0);
+ } else {
+ emitByte(1);
+ }
+
+ /* XXX: Emit relative addressing mode */
+ emitByte(0);
+ } else {
+ /* XXX: Handle other operand types. Are there any for destination regs? */
+ emitNullBytes(DST_BYTE_COUNT);
+ }
+}
+
+void R600CodeEmitter::emitALU(MachineInstr &MI, unsigned numSrc)
+{
+ /* Emit the instruction (2 bytes) */
+ emitTwoBytes(getHWInst(MI));
+
+ /* Emit isLast (for this instruction group) (1 byte) */
+ if (isLast) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+ /* Emit isOp3 (1 byte) */
+ if (numSrc == 3) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit predicate (1 byte) */
+ emitByte(0);
+
+ /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
+ * r600_asm.c sets it. */
+ emitByte(0);
+
+ /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */
+ emitByte(0);
+
+ /* XXX: Emit OMOD (1 byte) Not implemented. */
+ emitByte(0);
+
+ /* XXX: Emit index_mode. I think this is for indirect addressing, so we
+ * don't need to worry about it. */
+ emitByte(0);
+}
+
+void R600CodeEmitter::emitTexInstr(MachineInstr &MI)
+{
+
+ int64_t sampler = MI.getOperand(2).getImm();
+ int64_t textureType = MI.getOperand(3).getImm();
+ unsigned opcode = MI.getOpcode();
+ unsigned srcSelect[4] = {0, 1, 2, 3};
+
+ /* Emit instruction type */
+ emitByte(1);
+
+ /* Emit instruction */
+ emitByte(getHWInst(MI));
+
+ /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */
+ emitByte(sampler + 1 + 1);
+
+ /* Emit source register */
+ emitByte(getHWReg(MI.getOperand(1).getReg()));
+
+ /* XXX: Emit src isRelativeAddress */
+ emitByte(0);
+
+ /* Emit destination register */
+ emitByte(getHWReg(MI.getOperand(0).getReg()));
+
+ /* XXX: Emit dst isRealtiveAddress */
+ emitByte(0);
+
+ /* XXX: Emit dst select */
+ emitByte(0); /* X */
+ emitByte(1); /* Y */
+ emitByte(2); /* Z */
+ emitByte(3); /* W */
+
+ /* XXX: Emit lod bias */
+ emitByte(0);
+
+ /* XXX: Emit coord types */
+ unsigned coordType[4] = {1, 1, 1, 1};
+
+ if (textureType == TEXTURE_RECT
+ || textureType == TEXTURE_SHADOWRECT) {
+ coordType[ELEMENT_X] = 0;
+ coordType[ELEMENT_Y] = 0;
+ }
+
+ if (textureType == TEXTURE_1D_ARRAY
+ || textureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (opcode == AMDIL::TEX_SAMPLE_C_L || opcode == AMDIL::TEX_SAMPLE_C_LB) {
+ coordType[ELEMENT_Y] = 0;
+ } else {
+ coordType[ELEMENT_Z] = 0;
+ srcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (textureType == TEXTURE_2D_ARRAY
+ || textureType == TEXTURE_SHADOW2D_ARRAY) {
+ coordType[ELEMENT_Z] = 0;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ emitByte(coordType[i]);
+ }
+
+ /* XXX: Emit offsets */
+ emitByte(0); /* X */
+ emitByte(0); /* Y */
+ emitByte(0); /* Z */
+ /* There is no OFFSET_W */
+
+ /* Emit sampler id */
+ emitByte(sampler);
+
+ /* XXX:Emit source select */
+ if ((textureType == TEXTURE_SHADOW1D
+ || textureType == TEXTURE_SHADOW2D
+ || textureType == TEXTURE_SHADOWRECT
+ || textureType == TEXTURE_SHADOW1D_ARRAY)
+ && opcode != AMDIL::TEX_SAMPLE_C_L
+ && opcode != AMDIL::TEX_SAMPLE_C_LB) {
+ srcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ emitByte(srcSelect[i]);
+ }
+}
+
+void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
+{
+ /* Emit instruction type */
+ emitByte(INSTR_FC);
+
+ /* Emit SRC */
+ unsigned numOperands = MI.getNumOperands();
+ if (numOperands > 0) {
+ assert(numOperands == 1);
+ emitSrc(MI.getOperand(0));
+ } else {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
+
+ /* Emit FC Instruction */
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDIL::BREAK_LOGICALZ_f32:
+ instr = FC_BREAK;
+ break;
+ case AMDIL::BREAK_LOGICALNZ_i32:
+ instr = FC_BREAK_NZ_INT;
+ break;
+ case AMDIL::BREAK_LOGICALZ_i32:
+ instr = FC_BREAK_Z_INT;
+ break;
+ case AMDIL::CONTINUE_LOGICALNZ_f32:
+ instr = FC_CONTINUE;
+ break;
+ /* XXX: This assumes that all IFs will be if (x != 0). If we add
+ * optimizations this might not be the case */
+ case AMDIL::IF_LOGICALNZ_f32:
+ case AMDIL::IF_LOGICALNZ_i32:
+ instr = FC_IF;
+ break;
+ case AMDIL::IF_LOGICALZ_f32:
+ abort();
+ break;
+ case AMDIL::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDIL::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDIL::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDIL::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ emitByte(instr);
+}
+
+#define INSTR_FLOAT2_V(inst, hw) \
+ case AMDIL:: inst##_v4f32: \
+ case AMDIL:: inst##_v2f32: return HW_INST2(hw);
+
+#define INSTR_FLOAT2_S(inst, hw) \
+ case AMDIL:: inst##_f32: return HW_INST2(hw);
+
+#define INSTR_FLOAT2(inst, hw) \
+ INSTR_FLOAT2_V(inst, hw) \
+ INSTR_FLOAT2_S(inst, hw)
+
+unsigned int R600CodeEmitter::getHWInst(const MachineInstr &MI)
+{
+
+ /* XXX: Lower these to MOV before the code emitter. */
+ switch (MI.getOpcode()) {
+ case AMDIL::STORE_OUTPUT:
+ case AMDIL::VCREATE_v4i32:
+ case AMDIL::LOADCONST_i32:
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::MOVE_v4i32:
+ /* Instructons to reinterpret bits as ... */
+ case AMDIL::IL_ASINT_f32:
+ case AMDIL::IL_ASINT_i32:
+ case AMDIL::IL_ASFLOAT_f32:
+ case AMDIL::IL_ASFLOAT_i32:
+ return 0x19;
+
+ default:
+ return getBinaryCodeForInstr(MI);
+ }
+}
+
+void R600CodeEmitter::emitNullBytes(unsigned int byteCount)
+{
+ for (unsigned int i = 0; i < byteCount; i++) {
+ emitByte(0);
+ }
+}
+
+void R600CodeEmitter::emitByte(unsigned int byte)
+{
+ _OS.write((uint8_t) byte & 0xff);
+}
+void R600CodeEmitter::emitTwoBytes(unsigned int bytes)
+{
+ _OS.write((uint8_t) (bytes & 0xff));
+ _OS.write((uint8_t) ((bytes >> 8) & 0xff));
+}
+
+void R600CodeEmitter::emit(uint32_t value)
+{
+ for (unsigned i = 0; i < 4; i++) {
+ _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
+ }
+}
+
+void R600CodeEmitter::emit(uint64_t value)
+{
+ for (unsigned i = 0; i < 8; i++) {
+ emitByte((value >> (8 * i)) & 0xff);
+ }
+}
+
+unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
+{
+ unsigned hwReg;
+
+ hwReg = TRI->getHWRegIndex(regNo);
+ if (AMDIL::R600_CReg32RegClass.contains(regNo)) {
+ hwReg += 512;
+ }
+ return hwReg;
+}
+
+uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const
+{
+ if (MO.isReg()) {
+ return getHWReg(MO.getReg());
+ } else {
+ return MO.getImm();
+ }
+}
+
+
+RegElement maskBitToElement(unsigned int maskBit)
+{
+ switch (maskBit) {
+ case WRITE_MASK_X: return ELEMENT_X;
+ case WRITE_MASK_Y: return ELEMENT_Y;
+ case WRITE_MASK_Z: return ELEMENT_Z;
+ case WRITE_MASK_W: return ELEMENT_W;
+ default:
+ assert("Invalid maskBit");
+ return ELEMENT_X;
+ }
+}
+
+unsigned int dstSwizzleToWriteMask(unsigned swizzle)
+{
+ switch(swizzle) {
+ default:
+ case AMDIL_DST_SWIZZLE_DEFAULT:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X___:
+ return WRITE_MASK_X;
+ case AMDIL_DST_SWIZZLE_XY__:
+ return WRITE_MASK_X | WRITE_MASK_Y;
+ case AMDIL_DST_SWIZZLE_XYZ_:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE_XYZW:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE__Y__:
+ return WRITE_MASK_Y;
+ case AMDIL_DST_SWIZZLE__YZ_:
+ return WRITE_MASK_Y | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE__YZW:
+ return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE___Z_:
+ return WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE___ZW:
+ return WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE____W:
+ return WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X_ZW:
+ return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_XY_W:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X_Z_:
+ return WRITE_MASK_X | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE_X__W:
+ return WRITE_MASK_X | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE__Y_W:
+ return WRITE_MASK_Y | WRITE_MASK_W;
+ }
+}
+
+#include "AMDILGenCodeEmitter.inc"
+
diff --git a/lib/Target/AMDIL/R600KernelParameters.cpp b/lib/Target/AMDIL/R600KernelParameters.cpp
new file mode 100644
index 00000000000..3fdf48a2bf2
--- /dev/null
+++ b/lib/Target/AMDIL/R600KernelParameters.cpp
@@ -0,0 +1,503 @@
+//===-- R600KernelParameters.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include <llvm-c/Core.h>
+#include "R600KernelParameters.h"
+#include "R600OpenCLUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/TypeBuilder.h"
+// #include "llvm/CodeGen/Function.h"
+
+namespace AMDILAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5, // Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
+ LAST_ADDRESS = 8
+};
+}
+
+
+#include <map>
+#include <set>
+
+using namespace llvm;
+using namespace std;
+
+#define CONSTANT_CACHE_SIZE_DW 127
+
+class R600KernelParameters : public llvm::FunctionPass
+{
+ const llvm::TargetData * TD;
+ LLVMContext* Context;
+ Module *mod;
+
+ struct param
+ {
+ param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {}
+
+ llvm::Value* val;
+ llvm::Value* ptr_val;
+ int offset_in_dw;
+ int size_in_dw;
+
+ bool indirect;
+
+ string specialType;
+ int specialID;
+
+ int end() { return offset_in_dw + size_in_dw; }
+ /* The first 9 dwords are reserved for the grid sizes. */
+ int get_rat_offset() { return 9 + offset_in_dw; }
+ };
+
+ std::vector<param> params;
+
+ int getLastSpecialID(const string& TypeName);
+
+ int getListSize();
+ void AddParam(llvm::Argument* arg);
+ int calculateArgumentSize(llvm::Argument* arg);
+ void RunAna(llvm::Function* fun);
+ void Replace(llvm::Function* fun);
+ bool isIndirect(Value* val, set<Value*>& visited);
+ void Propagate(llvm::Function* fun);
+ void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false);
+ Value* ConstantRead(Function* fun, param& p);
+ Value* handleSpecial(Function* fun, param& p);
+ bool isSpecialType(Type*);
+ string getSpecialTypeName(Type*);
+public:
+ static char ID;
+ R600KernelParameters() : FunctionPass(ID) {};
+ R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {}
+// bool runOnFunction (llvm::Function &F);
+ bool runOnFunction (llvm::Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ const char *getPassName() const;
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+};
+
+char R600KernelParameters::ID = 0;
+
+static RegisterPass<R600KernelParameters> X("kerparam", "OpenCL Kernel Parameter conversion", false, false);
+
+int R600KernelParameters::getLastSpecialID(const string& TypeName)
+{
+ int lastID = -1;
+
+ for (vector<param>::iterator i = params.begin(); i != params.end(); i++)
+ {
+ if (i->specialType == TypeName)
+ {
+ lastID = i->specialID;
+ }
+ }
+
+ return lastID;
+}
+
+int R600KernelParameters::getListSize()
+{
+ if (params.size() == 0)
+ {
+ return 0;
+ }
+
+ return params.back().end();
+}
+
+bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
+{
+ if (isa<LoadInst>(val))
+ {
+ return false;
+ }
+
+ if (isa<IntegerType>(val->getType()))
+ {
+ assert(0 and "Internal error");
+ return false;
+ }
+
+ if (visited.count(val))
+ {
+ return false;
+ }
+
+ visited.insert(val);
+
+ if (isa<GetElementPtrInst>(val))
+ {
+ GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(val);
+ GetElementPtrInst::op_iterator i = GEP->op_begin();
+
+ for (i++; i != GEP->op_end(); i++)
+ {
+ if (!isa<Constant>(*i))
+ {
+ return true;
+ }
+ }
+ }
+
+ for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++)
+ {
+ Value* v2 = dyn_cast<Value>(*i);
+
+ if (v2)
+ {
+ if (isIndirect(v2, visited))
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+void R600KernelParameters::AddParam(llvm::Argument* arg)
+{
+ param p;
+
+ p.val = dyn_cast<Value>(arg);
+ p.offset_in_dw = getListSize();
+ p.size_in_dw = calculateArgumentSize(arg);
+
+ if (isa<PointerType>(arg->getType()) and arg->hasByValAttr())
+ {
+ set<Value*> visited;
+ p.indirect = isIndirect(p.val, visited);
+ }
+
+ params.push_back(p);
+}
+
+int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg)
+{
+ Type* t = arg->getType();
+
+ if (arg->hasByValAttr() and dyn_cast<PointerType>(t))
+ {
+ t = dyn_cast<PointerType>(t)->getElementType();
+ }
+
+ int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4;
+
+ assert(store_size_in_dw);
+
+ return store_size_in_dw;
+}
+
+
+void R600KernelParameters::RunAna(llvm::Function* fun)
+{
+ assert(isOpenCLKernel(fun));
+
+ for (Function::arg_iterator i = fun->arg_begin(); i != fun->arg_end(); i++)
+ {
+ AddParam(i);
+ }
+
+}
+
+void R600KernelParameters::Replace(llvm::Function* fun)
+{
+ for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
+ {
+ Value *new_val;
+
+ if (isSpecialType(i->val->getType()))
+ {
+ new_val = handleSpecial(fun, *i);
+ }
+ else
+ {
+ new_val = ConstantRead(fun, *i);
+ }
+ if (new_val)
+ {
+ i->val->replaceAllUsesWith(new_val);
+ }
+ }
+}
+
+void R600KernelParameters::Propagate(llvm::Function* fun)
+{
+ for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
+ {
+ if (i->ptr_val)
+ {
+ Propagate(i->ptr_val, i->val->getName(), i->indirect);
+ }
+ }
+}
+
+void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
+{
+ LoadInst* load = dyn_cast<LoadInst>(v);
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v);
+
+ unsigned addrspace;
+
+ if (indirect)
+ {
+ addrspace = AMDILAS::PARAM_I_ADDRESS;
+ }
+ else
+ {
+ addrspace = AMDILAS::PARAM_D_ADDRESS;
+ }
+
+ if (GEP and GEP->getType()->getAddressSpace() != addrspace)
+ {
+ Value* op = GEP->getPointerOperand();
+
+ if (dyn_cast<PointerType>(op->getType())->getAddressSpace() != addrspace)
+ {
+ op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(op->getType())->getElementType(), addrspace), name, dyn_cast<Instruction>(v));
+ }
+
+ vector<Value*> params(GEP->idx_begin(), GEP->idx_end());
+
+ GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast<Instruction>(v));
+ GEP2->setIsInBounds(GEP->isInBounds());
+ v = dyn_cast<Value>(GEP2);
+ GEP->replaceAllUsesWith(GEP2);
+ GEP->eraseFromParent();
+ load = NULL;
+ }
+
+ if (load)
+ {
+ if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space
+ {
+ Value *orig_ptr = load->getPointerOperand();
+ PointerType *orig_ptr_type = dyn_cast<PointerType>(orig_ptr->getType());
+
+ Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace);
+
+ Value* new_ptr = orig_ptr;
+
+ if (orig_ptr->getType() != new_ptr_type)
+ {
+ new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load);
+ }
+
+ Value* new_load = new LoadInst(new_ptr, name, load);
+ load->replaceAllUsesWith(new_load);
+ load->eraseFromParent();
+ }
+
+ return;
+ }
+
+ vector<User*> users(v->use_begin(), v->use_end());
+
+ for (int i = 0; i < int(users.size()); i++)
+ {
+ Value* v2 = dyn_cast<Value>(users[i]);
+
+ if (v2)
+ {
+ Propagate(v2, name, indirect);
+ }
+ }
+}
+
+Value* R600KernelParameters::ConstantRead(Function* fun, param& p)
+{
+ assert(fun->front().begin() != fun->front().end());
+
+ Instruction *first_inst = fun->front().begin();
+ IRBuilder <> builder (first_inst);
+/* First 3 dwords are reserved for the dimmension info */
+
+ if (!p.val->hasNUsesOrMore(1))
+ {
+ return NULL;
+ }
+ unsigned addrspace;
+
+ if (p.indirect)
+ {
+ addrspace = AMDILAS::PARAM_I_ADDRESS;
+ }
+ else
+ {
+ addrspace = AMDILAS::PARAM_D_ADDRESS;
+ }
+
+ Argument *arg = dyn_cast<Argument>(p.val);
+ Type * argType = p.val->getType();
+ PointerType * argPtrType = dyn_cast<PointerType>(p.val->getType());
+
+ if (argPtrType and arg->hasByValAttr())
+ {
+ Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace));
+ Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst);
+ param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst);
+ p.ptr_val = param_ptr;
+ return param_ptr;
+ }
+ else
+ {
+ Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace));
+
+ Value* param_ptr = builder.CreateGEP(param_addr_space_ptr,
+ ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName());
+
+ Value* param_value = builder.CreateLoad(param_ptr, arg->getName());
+
+ return param_value;
+ }
+}
+
+Value* R600KernelParameters::handleSpecial(Function* fun, param& p)
+{
+ string name = getSpecialTypeName(p.val->getType());
+ int ID;
+
+ assert(!name.empty());
+
+ if (name == "image2d_t" or name == "image3d_t")
+ {
+ int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t"));
+
+ if (lastID == -1)
+ {
+ ID = 2; ///ID0 and ID1 are used internally by the driver
+ }
+ else
+ {
+ ID = lastID + 1;
+ }
+ }
+ else if (name == "sampler_t")
+ {
+ int lastID = getLastSpecialID("sampler_t");
+
+ if (lastID == -1)
+ {
+ ID = 0;
+ }
+ else
+ {
+ ID = lastID + 1;
+ }
+ }
+ else
+ {
+ ///TODO: give some error message
+ return NULL;
+ }
+
+ p.specialType = name;
+ p.specialID = ID;
+
+ Instruction *first_inst = fun->front().begin();
+
+ return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst);
+}
+
+
+bool R600KernelParameters::isSpecialType(Type* t)
+{
+ return !getSpecialTypeName(t).empty();
+}
+
+string R600KernelParameters::getSpecialTypeName(Type* t)
+{
+ PointerType *pt = dyn_cast<PointerType>(t);
+ StructType *st = NULL;
+
+ if (pt)
+ {
+ st = dyn_cast<StructType>(pt->getElementType());
+ }
+
+ if (st)
+ {
+ string prefix = "struct.opencl_builtin_type_";
+
+ string name = st->getName().str();
+
+ if (name.substr(0, prefix.length()) == prefix)
+ {
+ return name.substr(prefix.length(), name.length());
+ }
+ }
+
+ return "";
+}
+
+
+bool R600KernelParameters::runOnFunction (Function &F)
+{
+ if (!isOpenCLKernel(&F))
+ {
+ return false;
+ }
+
+// F.dump();
+
+ RunAna(&F);
+ Replace(&F);
+ Propagate(&F);
+
+ mod->dump();
+ return false;
+}
+
+void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const
+{
+// AU.addRequired<FunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+const char *R600KernelParameters::getPassName() const
+{
+ return "OpenCL Kernel parameter conversion to memory";
+}
+
+bool R600KernelParameters::doInitialization(Module &M)
+{
+ Context = &M.getContext();
+ mod = &M;
+
+ return false;
+}
+
+bool R600KernelParameters::doFinalization(Module &M)
+{
+ return false;
+}
+
+llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD)
+{
+ FunctionPass *p = new R600KernelParameters(TD);
+
+ return p;
+}
+
+
diff --git a/lib/Target/AMDIL/R600KernelParameters.h b/lib/Target/AMDIL/R600KernelParameters.h
new file mode 100644
index 00000000000..904a469a5f0
--- /dev/null
+++ b/lib/Target/AMDIL/R600KernelParameters.h
@@ -0,0 +1,28 @@
+//===-- R600KernelParameters.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KERNELPARAMETERS_H
+#define KERNELPARAMETERS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Value.h"
+
+#include <vector>
+
+llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD);
+
+
+#endif
diff --git a/lib/Target/AMDIL/R600LowerInstructions.cpp b/lib/Target/AMDIL/R600LowerInstructions.cpp
new file mode 100644
index 00000000000..fb5431d0eef
--- /dev/null
+++ b/lib/Target/AMDIL/R600LowerInstructions.cpp
@@ -0,0 +1,502 @@
+//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUUtil.h"
+#include "AMDIL.h"
+#include "AMDILRegisterInfo.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class R600LowerInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+ const R600InstrInfo * TII;
+ MachineRegisterInfo * MRI;
+
+ void lowerFLT(MachineInstr &MI);
+
+ void calcAddress(const MachineOperand &ptrOp,
+ const MachineOperand &indexOp,
+ unsigned indexReg,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void divMod(MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool div = true) const;
+
+ public:
+ R600LowerInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const R600InstrInfo*>(tm.getInstrInfo())),
+ MRI(NULL)
+ { }
+
+ const char *getPassName() const { return "R600 Lower Instructions"; }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char R600LowerInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createR600LowerInstructionsPass(TargetMachine &tm) {
+ return new R600LowerInstructionsPass(tm);
+}
+
+bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+
+ MachineInstr &MI = *I;
+ switch(MI.getOpcode()) {
+ case AMDIL::FLT:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(1));
+ break;
+
+ case AMDIL::ABS_i32:
+ {
+ unsigned setgt = MRI->createVirtualRegister(
+ &AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT),
+ setgt)
+ .addOperand(MI.getOperand(1))
+ .addReg(AMDIL::ZERO);
+
+ unsigned add_int = MRI->createVirtualRegister(
+ &AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT),
+ add_int)
+ .addReg(setgt)
+ .addOperand(MI.getOperand(1));
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::XOR_INT))
+ .addOperand(MI.getOperand(0))
+ .addReg(setgt)
+ .addReg(add_int);
+
+ break;
+ }
+
+ /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
+ * remove the ABS instruction.*/
+ case AMDIL::FABS_f32:
+ case AMDIL::ABS_f32:
+ MI.getOperand(1).addTargetFlag(MO_FLAG_ABS);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+
+ case AMDIL::BINARY_OR_f32:
+ {
+ unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp0)
+ .addOperand(MI.getOperand(1));
+ unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp1)
+ .addOperand(MI.getOperand(2));
+ unsigned tmp2 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::BINARY_OR_i32), tmp2)
+ .addReg(tmp0)
+ .addReg(tmp1);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::ITOF), MI.getOperand(0).getReg())
+ .addReg(tmp2);
+ break;
+ }
+ case AMDIL::CMOVLOG_f32:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(MI.getOpcode()))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(3))
+ .addOperand(MI.getOperand(2));
+ break;
+
+ case AMDIL::CMOVLOG_i32:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(3))
+ .addOperand(MI.getOperand(2));
+ break;
+
+ case AMDIL::CLAMP_f32:
+ {
+ MachineOperand lowOp = MI.getOperand(2);
+ MachineOperand highOp = MI.getOperand(3);
+ if (lowOp.isReg() && highOp.isReg()
+ && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) {
+ MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ } else {
+ /* XXX: Handle other cases */
+ abort();
+ }
+ break;
+ }
+
+ case AMDIL::UDIV_i32:
+ divMod(MI, MBB, I);
+ break;
+
+ /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
+/* case AMDIL::DIV_INF_f32:
+ {
+ unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRF32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::RECIP_CLAMPED), tmp0)
+ .addOperand(MI.getOperand(2));
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::MUL_IEEE_f32))
+ .addOperand(MI.getOperand(0))
+ .addReg(tmp0)
+ .addOperand(MI.getOperand(1));
+ break;
+ }
+*/ /* XXX: This is an optimization */
+
+ case AMDIL::GLOBALLOAD_f32:
+ case AMDIL::GLOBALLOAD_i32:
+ {
+ MachineOperand &ptrOperand = MI.getOperand(1);
+ MachineOperand &indexOperand = MI.getOperand(2);
+ unsigned indexReg =
+ MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
+
+ /* Calculate the address with in the VTX buffer */
+ calcAddress(ptrOperand, indexOperand, indexReg, MBB, I);
+
+ /* Make sure the VTX_READ_eg writes to the X chan */
+ MRI->setRegClass(MI.getOperand(0).getReg(),
+ &AMDIL::R600_TReg32_XRegClass);
+
+ /* Add the VTX_READ_eg instruction */
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDIL::VTX_READ_eg))
+ .addOperand(MI.getOperand(0))
+ .addReg(indexReg)
+ .addImm(1);
+ break;
+ }
+
+ case AMDIL::GLOBALSTORE_i32:
+ case AMDIL::GLOBALSTORE_f32:
+ {
+ MachineOperand &ptrOperand = MI.getOperand(1);
+ MachineOperand &indexOperand = MI.getOperand(2);
+ unsigned rwReg =
+ MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
+ unsigned byteIndexReg =
+ MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned shiftReg =
+ MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned indexReg =
+ MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
+
+ /* Move the store value to the correct register class */
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY), rwReg)
+ .addOperand(MI.getOperand(0));
+
+ /* Calculate the address in the RAT */
+ calcAddress(ptrOperand, indexOperand, byteIndexReg, MBB, I);
+
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV), shiftReg)
+ .addReg(AMDIL::ALU_LITERAL_X)
+ .addImm(2);
+
+ /* XXX: Check GPU family */
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDIL::LSHR_eg), indexReg)
+ .addReg(byteIndexReg)
+ .addReg(shiftReg);
+
+ /* XXX: Check GPU Family */
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDIL::RAT_WRITE_CACHELESS_eg))
+ .addReg(rwReg)
+ .addReg(indexReg)
+ .addImm(0);
+ break;
+ }
+ case AMDIL::ILT:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(1));
+ break;
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::LOADCONST_i32:
+ {
+ bool canInline = false;
+ unsigned inlineReg;
+ MachineOperand & dstOp = MI.getOperand(0);
+ MachineOperand & immOp = MI.getOperand(1);
+ if (immOp.isFPImm()) {
+ const ConstantFP * cfp = immOp.getFPImm();
+ if (cfp->isZero()) {
+ canInline = true;
+ inlineReg = AMDIL::ZERO;
+ } else if (cfp->isExactlyValue(1.0f)) {
+ canInline = true;
+ inlineReg = AMDIL::ONE;
+ } else if (cfp->isExactlyValue(0.5f)) {
+ canInline = true;
+ inlineReg = AMDIL::HALF;
+ }
+ }
+
+ if (canInline) {
+ MachineOperand * use = dstOp.getNextOperandForReg();
+ /* The lowering operation for CLAMP needs to have the immediates
+ * as operands, so we must propagate them. */
+ while (use) {
+ MachineOperand * next = use->getNextOperandForReg();
+ if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) {
+ use->setReg(inlineReg);
+ }
+ use = next;
+ }
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
+ .addOperand(dstOp)
+ .addReg(inlineReg);
+ } else {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
+ .addOperand(dstOp)
+ .addReg(AMDIL::ALU_LITERAL_X)
+ .addOperand(immOp);
+ }
+ break;
+ }
+
+ case AMDIL::MASK_WRITE:
+ {
+ unsigned maskedRegister = MI.getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI->getVRegDef(maskedRegister);
+ MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
+ def->addTargetFlag(MO_FLAG_MASK);
+ break;
+ }
+
+ case AMDIL::NEGATE_i32:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT))
+ .addOperand(MI.getOperand(0))
+ .addReg(AMDIL::ZERO)
+ .addOperand(MI.getOperand(1));
+ break;
+
+ case AMDIL::NEG_f32:
+ {
+ MI.getOperand(1).addTargetFlag(MO_FLAG_NEG);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(TII->getISAOpcode(AMDIL::MOV)))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ }
+
+ case AMDIL::SUB_f32:
+ {
+ MI.getOperand(2).addTargetFlag(MO_FLAG_NEG);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(TII->getISAOpcode(AMDIL::ADD_f32)))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2));
+ break;
+ }
+
+ default:
+ continue;
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp,
+ const MachineOperand &indexOp,
+ unsigned indexReg,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const
+{
+ /* Optimize the case where the indexOperand is 0 */
+ if (indexOp.isImm() && indexOp.getImm() == 0) {
+ assert(ptrOp.isReg());
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDIL::COPY), indexReg)
+ .addOperand(ptrOp);
+ } else {
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDIL::ADD_INT), indexReg)
+ .addOperand(indexOp)
+ .addOperand(ptrOp);
+ }
+}
+
+/* Mostly copied from tgsi_divmod() in r600_shader.c */
+void R600LowerInstructionsPass::divMod(MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool div) const
+{
+ unsigned dst = MI.getOperand(0).getReg();
+ MachineOperand &numerator = MI.getOperand(1);
+ MachineOperand &denominator = MI.getOperand(2);
+ /* rcp = RECIP(denominator) = 2^32 / denominator + e
+ * e is rounding error */
+ unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp)
+ .addOperand(denominator);
+
+ /* rcp_lo = lo(rcp * denominator) */
+ unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo)
+ .addReg(rcp)
+ .addOperand(denominator);
+
+ /* rcp_hi = HI (rcp * denominator) */
+ unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi)
+ .addReg(rcp)
+ .addOperand(denominator);
+
+ unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo)
+ .addReg(AMDIL::ZERO)
+ .addReg(rcp_lo);
+
+ unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo)
+ .addReg(rcp_hi)
+ .addReg(neg_rcp_lo)
+ .addReg(rcp_lo);
+
+ unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e)
+ .addReg(abs_rcp_lo)
+ .addReg(rcp);
+
+ unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e)
+ .addReg(rcp)
+ .addReg(e);
+
+ unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e)
+ .addReg(rcp)
+ .addReg(e);
+
+ /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */
+ unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0)
+ .addReg(rcp_hi)
+ .addReg(rcp_plus_e)
+ .addReg(rcp_sub_e);
+
+ unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q)
+ .addReg(tmp0)
+ .addOperand(numerator);
+
+ /* num_sub_r = q * denominator */
+ unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()),
+ num_sub_r)
+ .addReg(q)
+ .addOperand(denominator);
+
+ unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r)
+ .addOperand(numerator)
+ .addReg(num_sub_r);
+
+ unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den)
+ .addReg(r)
+ .addOperand(denominator);
+
+ unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero)
+ .addOperand(numerator)
+ .addReg(num_sub_r);
+
+ unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1)
+ .addReg(r_ge_den)
+ .addReg(r_ge_zero);
+
+ unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
+ if (div) {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0)
+ .addReg(q)
+ .addReg(AMDIL::ONE_INT);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1)
+ .addReg(q)
+ .addReg(AMDIL::ONE_INT);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
+ .addReg(tmp1)
+ .addReg(q)
+ .addReg(val0);
+ } else {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0)
+ .addReg(r)
+ .addOperand(denominator);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1)
+ .addReg(r)
+ .addOperand(denominator);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
+ .addReg(tmp1)
+ .addReg(r)
+ .addReg(val0);
+ }
+
+ /* XXX: Do we need to set to MAX_INT if denominator is 0? */
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst)
+ .addReg(r_ge_zero)
+ .addReg(val1)
+ .addReg(result);
+}
diff --git a/lib/Target/AMDIL/R600LowerShaderInstructions.cpp b/lib/Target/AMDIL/R600LowerShaderInstructions.cpp
new file mode 100644
index 00000000000..394ee7006ce
--- /dev/null
+++ b/lib/Target/AMDIL/R600LowerShaderInstructions.cpp
@@ -0,0 +1,143 @@
+//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPULowerShaderInstructions.h"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ class R600LowerShaderInstructionsPass : public MachineFunctionPass,
+ public AMDGPULowerShaderInstructionsPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+ void lowerLOAD_INPUT(MachineInstr & MI);
+ bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ public:
+ R600LowerShaderInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Lower Shader Instructions"; }
+ };
+} /* End anonymous namespace */
+
+char R600LowerShaderInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) {
+ return new R600LowerShaderInstructionsPass(tm);
+}
+
+#define INSTR_CASE_FLOAT_V(inst) \
+ case AMDIL:: inst##_v4f32: \
+
+#define INSTR_CASE_FLOAT_S(inst) \
+ case AMDIL:: inst##_f32:
+
+#define INSTR_CASE_FLOAT(inst) \
+ INSTR_CASE_FLOAT_V(inst) \
+ INSTR_CASE_FLOAT_S(inst)
+bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MRI = &MF.getRegInfo();
+
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ MachineInstr &MI = *I;
+ bool deleteInstr = false;
+ switch (MI.getOpcode()) {
+
+ default: break;
+
+ case AMDIL::RESERVE_REG:
+ case AMDIL::EXPORT_REG:
+ deleteInstr = true;
+ break;
+
+ case AMDIL::LOAD_INPUT:
+ lowerLOAD_INPUT(MI);
+ deleteInstr = true;
+ break;
+
+ case AMDIL::STORE_OUTPUT:
+ deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I);
+ break;
+
+ }
+
+ ++I;
+
+ if (deleteInstr) {
+ MI.eraseFromParent();
+ }
+ }
+ }
+
+ return false;
+}
+
+/* The goal of this function is to replace the virutal destination register of
+ * a LOAD_INPUT instruction with the correct physical register that will.
+ *
+ * XXX: I don't think this is the right way things assign physical registers,
+ * but I'm not sure of another way to do this.
+ */
+void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI)
+{
+ MachineOperand &dst = MI.getOperand(0);
+ MachineOperand &arg = MI.getOperand(1);
+ int64_t inputIndex = arg.getImm();
+ const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
+ unsigned newRegister = inputClass->getRegister(inputIndex);
+ unsigned dstReg = dst.getReg();
+
+ preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister,
+ dstReg);
+}
+
+bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
+{
+ MachineOperand &valueOp = MI.getOperand(1);
+ MachineOperand &indexOp = MI.getOperand(2);
+ unsigned valueReg = valueOp.getReg();
+ int64_t outputIndex = indexOp.getImm();
+ const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
+ unsigned newRegister = outputClass->getRegister(outputIndex);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY),
+ newRegister)
+ .addReg(valueReg);
+
+ if (!MRI->isLiveOut(newRegister))
+ MRI->addLiveOut(newRegister);
+
+ return true;
+
+}
diff --git a/lib/Target/AMDIL/R600OpenCLUtils.h b/lib/Target/AMDIL/R600OpenCLUtils.h
new file mode 100644
index 00000000000..91e41d63d0d
--- /dev/null
+++ b/lib/Target/AMDIL/R600OpenCLUtils.h
@@ -0,0 +1,49 @@
+//===-- OpenCLUtils.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+#ifndef OPENCLUTILS_H
+#define OPENCLUTILS_H
+
+#include "llvm/Function.h"
+
+#include <llvm/Module.h>
+
+static bool isOpenCLKernel(const llvm::Function* fun)
+{
+ llvm::Module *mod = const_cast<llvm::Function*>(fun)->getParent();
+ llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels");
+
+ if (!md or !md->getNumOperands())
+ {
+ return false;
+ }
+
+ for (int i = 0; i < int(md->getNumOperands()); i++)
+ {
+ if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0))
+ {
+ continue;
+ }
+
+ assert(md->getOperand(i)->getNumOperands() == 1);
+
+ if (md->getOperand(i)->getOperand(0)->getName() == fun->getName())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+#endif
diff --git a/lib/Target/AMDIL/SIAssignInterpRegs.cpp b/lib/Target/AMDIL/SIAssignInterpRegs.cpp
new file mode 100644
index 00000000000..6fe29c6a3fe
--- /dev/null
+++ b/lib/Target/AMDIL/SIAssignInterpRegs.cpp
@@ -0,0 +1,110 @@
+//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "AMDGPU.h"
+#include "AMDGPUUtil.h"
+#include "AMDIL.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ class SIAssignInterpRegsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ SIAssignInterpRegsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "SI Assign intrpolation registers"; }
+ };
+} // End anonymous namespace
+
+char SIAssignInterpRegsPass::ID = 0;
+
+#define INTERP_VALUES 16
+
+struct interp_info {
+ bool enabled;
+ unsigned regs[3];
+ unsigned reg_count;
+};
+
+
+FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
+ return new SIAssignInterpRegsPass(tm);
+}
+
+bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
+{
+
+ struct interp_info InterpUse[INTERP_VALUES] = {
+ {false, {AMDIL::PERSP_SAMPLE_I, AMDIL::PERSP_SAMPLE_J}, 2},
+ {false, {AMDIL::PERSP_CENTER_I, AMDIL::PERSP_CENTER_J}, 2},
+ {false, {AMDIL::PERSP_CENTROID_I, AMDIL::PERSP_CENTROID_J}, 2},
+ {false, {AMDIL::PERSP_I_W, AMDIL::PERSP_J_W, AMDIL::PERSP_1_W}, 3},
+ {false, {AMDIL::LINEAR_SAMPLE_I, AMDIL::LINEAR_SAMPLE_J}, 2},
+ {false, {AMDIL::LINEAR_CENTER_I, AMDIL::LINEAR_CENTER_J}, 2},
+ {false, {AMDIL::LINEAR_CENTROID_I, AMDIL::LINEAR_CENTROID_J}, 2},
+ {false, {AMDIL::LINE_STIPPLE_TEX_COORD}, 1},
+ {false, {AMDIL::POS_X_FLOAT}, 1},
+ {false, {AMDIL::POS_Y_FLOAT}, 1},
+ {false, {AMDIL::POS_Z_FLOAT}, 1},
+ {false, {AMDIL::POS_W_FLOAT}, 1},
+ {false, {AMDIL::FRONT_FACE}, 1},
+ {false, {AMDIL::ANCILLARY}, 1},
+ {false, {AMDIL::SAMPLE_COVERAGE}, 1},
+ {false, {AMDIL::POS_FIXED_PT}, 1}
+ };
+
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ /* First pass, mark the interpolation values that are used. */
+ for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
+ for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
+ reg_idx++) {
+ InterpUse[interp_idx].enabled =
+ !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
+ }
+ }
+
+ unsigned used_vgprs = 0;
+
+ /* Second pass, replace with VGPRs. */
+ for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
+ if (!InterpUse[interp_idx].enabled) {
+ continue;
+ }
+ MFI->spi_ps_input_addr |= (1 << interp_idx);
+
+ for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
+ reg_idx++, used_vgprs++) {
+ unsigned new_reg = AMDIL::VReg_32RegClass.getRegister(used_vgprs);
+ unsigned virt_reg = MRI.createVirtualRegister(&AMDIL::VReg_32RegClass);
+ MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
+ AMDGPU::utilAddLiveIn(&MF, MRI, TM.getInstrInfo(), new_reg, virt_reg);
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Target/AMDIL/SICodeEmitter.cpp b/lib/Target/AMDIL/SICodeEmitter.cpp
new file mode 100644
index 00000000000..ad494fae7c6
--- /dev/null
+++ b/lib/Target/AMDIL/SICodeEmitter.cpp
@@ -0,0 +1,274 @@
+//===-- SICodeEmitter.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPUUtil.h"
+#include "AMDILCodeEmitter.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <stdio.h>
+
+#define LITERAL_REG 255
+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
+using namespace llvm;
+
+namespace {
+
+ class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
+
+ private:
+ static char ID;
+ formatted_raw_ostream &_OS;
+ const TargetMachine *TM;
+ void emitState(MachineFunction & MF);
+ void emitInstr(MachineInstr &MI);
+
+ void outputBytes(uint64_t value, unsigned bytes);
+ unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
+ const;
+
+ public:
+ SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
+ _OS(OS), TM(NULL) { }
+ const char *getPassName() const { return "SI Code Emitter"; }
+ bool runOnMachineFunction(MachineFunction &MF);
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const;
+ };
+}
+
+char SICodeEmitter::ID = 0;
+
+FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
+ return new SICodeEmitter(OS);
+}
+
+void SICodeEmitter::emitState(MachineFunction & MF)
+{
+ unsigned maxSGPR = 0;
+ unsigned maxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDIL::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ if (AMDIL::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDIL::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDIL::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDIL::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDIL::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDIL::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDIL::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else {
+ assert("!Unknown register class");
+ }
+ hwReg = RI->getHWRegNum(reg);
+ maxUsed = ((hwReg + 1) * width) - 1;
+ if (isSGPR) {
+ maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR;
+ } else {
+ maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ maxSGPR += 2;
+ }
+ outputBytes(maxSGPR + 1, 4);
+ outputBytes(maxVGPR + 1, 4);
+ outputBytes(MFI->spi_ps_input_addr, 4);
+}
+
+bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
+{
+ MF.dump();
+ TM = &MF.getTarget();
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM->getInstrInfo());
+
+ emitState(MF);
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL
+ && MI.getOpcode() != AMDIL::RETURN) {
+ emitInstr(MI);
+ }
+ }
+ }
+ return false;
+}
+
+void SICodeEmitter::emitInstr(MachineInstr &MI)
+{
+ const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
+
+ uint64_t hwInst = getBinaryCodeForInstr(MI);
+
+ if ((hwInst & 0xffffffff) == 0xffffffff) {
+ fprintf(stderr, "Unsupported Instruction: \n");
+ MI.dump();
+ abort();
+ }
+
+// hwInst |= SII->getBinaryCode(MI);
+
+ unsigned bytes = SII->getEncodingBytes(MI);
+ outputBytes(hwInst, bytes);
+}
+
+uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const
+{
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+
+ switch(MO.getType()) {
+ case MachineOperand::MO_Register:
+ return RI->getBinaryCode(MO.getReg());
+
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+
+ case MachineOperand::MO_FPImmediate:
+ /* XXX: Not all instructions can use inline literals */
+ /* XXX: We should make sure this is a 32-bit constant */
+ return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
+ default:
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ break;
+ }
+}
+
+unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
+ unsigned shift) const
+{
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+ unsigned regCode = RI->getHWRegNum(MI.getOperand(OpNo).getReg());
+ return regCode >> shift;
+}
+
+unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return GPRAlign(MI, OpNo, 2);
+}
+
+unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return GPRAlign(MI, OpNo, 1);
+}
+
+uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
+}
+
+/* Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
+ * XXX: It would be nice if we could handle this without a PostEncode function.
+ */
+uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const
+{
+ const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
+ unsigned encodingType = SII->getEncodingType(MI);
+ unsigned numSrcOps;
+ unsigned vgprBitOffset;
+
+ if (encodingType == SIInstrEncodingType::VOP3) {
+ numSrcOps = 3;
+ vgprBitOffset = 32;
+ } else {
+ numSrcOps = 1;
+ vgprBitOffset = 0;
+ }
+
+ /* Add one to skip over the destination reg operand. */
+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
+ if (!MI.getOperand(opIdx).isReg()) {
+ continue;
+ }
+ unsigned reg = MI.getOperand(opIdx).getReg();
+ if (AMDIL::VReg_32RegClass.contains(reg)
+ || AMDIL::VReg_64RegClass.contains(reg)) {
+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
+ }
+ }
+ return Value;
+}
+
+
+void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
+{
+ for (unsigned i = 0; i < bytes; i++) {
+ _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
+ }
+}
diff --git a/lib/Target/AMDIL/SILowerShaderInstructions.cpp b/lib/Target/AMDIL/SILowerShaderInstructions.cpp
new file mode 100644
index 00000000000..5d49d88dc7c
--- /dev/null
+++ b/lib/Target/AMDIL/SILowerShaderInstructions.cpp
@@ -0,0 +1,90 @@
+//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPULowerShaderInstructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ class SILowerShaderInstructionsPass : public MachineFunctionPass,
+ public AMDGPULowerShaderInstructionsPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ SILowerShaderInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "SI Lower Shader Instructions"; }
+
+ void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+ void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+ };
+} /* End anonymous namespace */
+
+char SILowerShaderInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) {
+ return new SILowerShaderInstructionsPass(tm);
+}
+
+bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MRI = &MF.getRegInfo();
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ case AMDIL::RETURN:
+ lowerRETURN(MBB, I);
+ break;
+ case AMDIL::SET_M0:
+ lowerSET_M0(MI, MBB, I);
+ break;
+ default: continue;
+ }
+ MI.removeFromParent();
+ }
+ }
+
+ return false;
+}
+
+void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+{
+ const struct TargetInstrInfo * TII = TM.getInstrInfo();
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM));
+}
+
+void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
+{
+ const struct TargetInstrInfo * TII = TM.getInstrInfo();
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32))
+ .addReg(AMDIL::M0)
+ .addOperand(MI.getOperand(1));
+}
diff --git a/lib/Target/AMDIL/SIPropagateImmReads.cpp b/lib/Target/AMDIL/SIPropagateImmReads.cpp
new file mode 100644
index 00000000000..4f925d5de1c
--- /dev/null
+++ b/lib/Target/AMDIL/SIPropagateImmReads.cpp
@@ -0,0 +1,70 @@
+//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUUtil.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class SIPropagateImmReadsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ SIPropagateImmReadsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} /* End anonymous namespace */
+
+char SIPropagateImmReadsPass::ID = 0;
+
+FunctionPass *llvm::createSIPropagateImmReadsPass(TargetMachine &tm) {
+ return new SIPropagateImmReadsPass(tm);
+}
+
+bool SIPropagateImmReadsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const SIInstrInfo * TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I)) {
+ MachineInstr &MI = *I;
+
+ switch (MI.getOpcode()) {
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::LOADCONST_i32:
+ break;
+ default:
+ continue;
+ }
+
+ /* XXX: Create and use S_MOV_IMM for SREGs */
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::V_MOV_IMM))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}