From 6503c05aafd84388cacbf9fef76e7a0f4207c439 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 25 Apr 2012 13:49:39 -0400 Subject: AMDIL: Add Function passes for R600/SI codegen --- lib/Target/AMDIL/AMDGPUConvertToISA.cpp | 65 + lib/Target/AMDIL/AMDGPULowerInstructions.cpp | 82 + lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp | 38 + lib/Target/AMDIL/AMDGPULowerShaderInstructions.h | 40 + .../AMDIL/AMDGPUReorderPreloadInstructions.cpp | 66 + lib/Target/AMDIL/AMDILAlgorithms.tpp | 93 + lib/Target/AMDIL/AMDILCFGStructurizer.cpp | 3250 ++++++++++++++++++++ lib/Target/AMDIL/AMDILCodeEmitter.h | 46 + lib/Target/AMDIL/AMDILMCCodeEmitter.cpp | 158 + lib/Target/AMDIL/AMDILMachinePeephole.cpp | 173 ++ lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp | 1138 +++++++ lib/Target/AMDIL/R600CodeEmitter.cpp | 749 +++++ lib/Target/AMDIL/R600KernelParameters.cpp | 503 +++ lib/Target/AMDIL/R600KernelParameters.h | 28 + lib/Target/AMDIL/R600LowerInstructions.cpp | 502 +++ lib/Target/AMDIL/R600LowerShaderInstructions.cpp | 143 + lib/Target/AMDIL/R600OpenCLUtils.h | 49 + lib/Target/AMDIL/SIAssignInterpRegs.cpp | 110 + lib/Target/AMDIL/SICodeEmitter.cpp | 274 ++ lib/Target/AMDIL/SILowerShaderInstructions.cpp | 90 + lib/Target/AMDIL/SIPropagateImmReads.cpp | 70 + 21 files changed, 7667 insertions(+) create mode 100644 lib/Target/AMDIL/AMDGPUConvertToISA.cpp create mode 100644 lib/Target/AMDIL/AMDGPULowerInstructions.cpp create mode 100644 lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp create mode 100644 lib/Target/AMDIL/AMDGPULowerShaderInstructions.h create mode 100644 lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp create mode 100644 lib/Target/AMDIL/AMDILAlgorithms.tpp create mode 100644 lib/Target/AMDIL/AMDILCFGStructurizer.cpp create mode 100644 lib/Target/AMDIL/AMDILCodeEmitter.h create mode 100644 lib/Target/AMDIL/AMDILMCCodeEmitter.cpp create mode 100644 lib/Target/AMDIL/AMDILMachinePeephole.cpp create mode 100644 lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp create mode 100644 lib/Target/AMDIL/R600CodeEmitter.cpp create mode 100644 lib/Target/AMDIL/R600KernelParameters.cpp create mode 100644 lib/Target/AMDIL/R600KernelParameters.h create mode 100644 lib/Target/AMDIL/R600LowerInstructions.cpp create mode 100644 lib/Target/AMDIL/R600LowerShaderInstructions.cpp create mode 100644 lib/Target/AMDIL/R600OpenCLUtils.h create mode 100644 lib/Target/AMDIL/SIAssignInterpRegs.cpp create mode 100644 lib/Target/AMDIL/SICodeEmitter.cpp create mode 100644 lib/Target/AMDIL/SILowerShaderInstructions.cpp create mode 100644 lib/Target/AMDIL/SIPropagateImmReads.cpp diff --git a/lib/Target/AMDIL/AMDGPUConvertToISA.cpp b/lib/Target/AMDIL/AMDGPUConvertToISA.cpp new file mode 100644 index 00000000000..ce947f8ff78 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUConvertToISA.cpp @@ -0,0 +1,65 @@ +//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers AMDIL machine instructions to the appropriate hardware +// instructions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +namespace { + class AMDGPUConvertToISAPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + + void lowerFLT(MachineInstr &MI); + + public: + AMDGPUConvertToISAPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + }; +} /* End anonymous namespace */ + +char AMDGPUConvertToISAPass::ID = 0; + +FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { + return new AMDGPUConvertToISAPass(tm); +} + +bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) +{ + const AMDGPUInstrInfo * TII = + static_cast(TM.getInstrInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I) ) { + MachineInstr &MI = *I; + MachineInstr * newInstr = TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); + if (!newInstr) { + continue; + } + MBB.insert(I, newInstr); + MI.eraseFromParent(); + } + } + return false; +} diff --git a/lib/Target/AMDIL/AMDGPULowerInstructions.cpp b/lib/Target/AMDIL/AMDGPULowerInstructions.cpp new file mode 100644 index 00000000000..b49d0dddf65 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPULowerInstructions.cpp @@ -0,0 +1,82 @@ +//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "AMDGPU.h" +#include "AMDGPURegisterInfo.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + class AMDGPULowerInstructionsPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I, + MachineBasicBlock &MBB, MachineFunction &MF); + + public: + AMDGPULowerInstructionsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + }; +} /* End anonymous namespace */ + +char AMDGPULowerInstructionsPass::ID = 0; + +FunctionPass *llvm::createAMDGPULowerInstructionsPass(TargetMachine &tm) { + return new AMDGPULowerInstructionsPass(tm); +} + +bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) +{ + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I) ) { + MachineInstr &MI = *I; + + switch (MI.getOpcode()) { + default: continue; + case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break; + + } + MI.eraseFromParent(); + } + } + return false; +} + +void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI, + MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF) +{ + MachineRegisterInfo & MRI = MF.getRegInfo(); + unsigned tmp = MRI.createVirtualRegister( + MRI.getRegClass(MI.getOperand(0).getReg())); + + BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::IMPLICIT_DEF), tmp); + + BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::INSERT_SUBREG)) + .addOperand(MI.getOperand(0)) + .addReg(tmp) + .addOperand(MI.getOperand(1)) + .addImm(AMDIL::sel_x); +} diff --git a/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp new file mode 100644 index 00000000000..d33055ccb87 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.cpp @@ -0,0 +1,38 @@ +//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "AMDGPULowerShaderInstructions.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF, + const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const +{ + if (!MRI->isLiveIn(physReg)) { + MRI->addLiveIn(physReg, virtReg); + MachineBasicBlock &EntryMBB = MF->front(); + BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + virtReg) + .addReg(physReg); + } else { + /* We can't mark the same register as preloaded twice, but we still must + * associate virtReg with the correct preloaded register. */ + unsigned newReg = MRI->getLiveInVirtReg(physReg); + MRI->replaceRegWith(virtReg, newReg); + } +} diff --git a/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h new file mode 100644 index 00000000000..5ee77fafe2b --- /dev/null +++ b/lib/Target/AMDIL/AMDGPULowerShaderInstructions.h @@ -0,0 +1,40 @@ +//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS +#define AMDGPU_LOWER_SHADER_INSTRUCTIONS + +namespace llvm { + +class MachineFunction; +class MachineRegisterInfo; +class TargetInstrInfo; + +class AMDGPULowerShaderInstructionsPass { + + protected: + MachineRegisterInfo * MRI; + /** + * @param physReg The physical register that will be preloaded. + * @param virtReg The virtual register that currently holds the + * preloaded value. + */ + void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII, + unsigned physReg, unsigned virtReg) const; +}; + +} // end namespace llvm + + +#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS diff --git a/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp b/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp new file mode 100644 index 00000000000..c923f19c39f --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUReorderPreloadInstructions.cpp @@ -0,0 +1,66 @@ +//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Function.h" + +using namespace llvm; + +namespace { + class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + + public: + AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; } + }; +} /* End anonymous namespace */ + +char AMDGPUReorderPreloadInstructionsPass::ID = 0; + +FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) { + return new AMDGPUReorderPreloadInstructionsPass(tm); +} + +/* This pass moves instructions that represent preloaded registers to the + * start of the program. */ +bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF) +{ + const AMDGPUInstrInfo * TII = + static_cast(TM.getInstrInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I) ) { + MachineInstr &MI = *I; + if (TII->isRegPreload(MI)) { + MF.front().insert(MF.front().begin(), MI.removeFromParent()); + } + } + } + return false; +} diff --git a/lib/Target/AMDIL/AMDILAlgorithms.tpp b/lib/Target/AMDIL/AMDILAlgorithms.tpp new file mode 100644 index 00000000000..058475f0f98 --- /dev/null +++ b/lib/Target/AMDIL/AMDILAlgorithms.tpp @@ -0,0 +1,93 @@ +//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides templates algorithms that extend the STL algorithms, but +// are useful for the AMDIL backend +// +//===----------------------------------------------------------------------===// + +// A template function that loops through the iterators and passes the second +// argument along with each iterator to the function. If the function returns +// true, then the current iterator is invalidated and it moves back, before +// moving forward to the next iterator, otherwise it moves forward without +// issue. This is based on the for_each STL function, but allows a reference to +// the second argument +template +Function binaryForEach(InputIterator First, InputIterator Last, Function F, + Arg &Second) +{ + for ( ; First!=Last; ++First ) { + F(*First, Second); + } + return F; +} + +template +Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F, + Arg &Second) +{ + for ( ; First!=Last; ++First ) { + if (F(*First, Second)) { + --First; + } + } + return F; +} + +// A template function that has two levels of looping before calling the +// function with the passed in argument. See binaryForEach for further +// explanation +template +Function binaryNestedForEach(InputIterator First, InputIterator Last, + Function F, Arg &Second) +{ + for ( ; First != Last; ++First) { + binaryForEach(First->begin(), First->end(), F, Second); + } + return F; +} +template +Function safeBinaryNestedForEach(InputIterator First, InputIterator Last, + Function F, Arg &Second) +{ + for ( ; First != Last; ++First) { + safeBinaryForEach(First->begin(), First->end(), F, Second); + } + return F; +} + +// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe' +// versions of these functions This allows the function to handle situations +// such as invalidated iterators +template +Function safeForEach(InputIterator First, InputIterator Last, Function F) +{ + for ( ; First!=Last; ++First ) F(&First) + ; // Do nothing. + return F; +} + +// A template function that has two levels of looping before calling the +// function with a pointer to the current iterator. See binaryForEach for +// further explanation +template +Function safeNestedForEach(InputIterator First, InputIterator Last, + SecondIterator S, Function F) +{ + for ( ; First != Last; ++First) { + SecondIterator sf, sl; + for (sf = First->begin(), sl = First->end(); + sf != sl; ) { + if (!F(&sf)) { + ++sf; + } + } + } + return F; +} diff --git a/lib/Target/AMDIL/AMDILCFGStructurizer.cpp b/lib/Target/AMDIL/AMDILCFGStructurizer.cpp new file mode 100644 index 00000000000..289af6f210e --- /dev/null +++ b/lib/Target/AMDIL/AMDILCFGStructurizer.cpp @@ -0,0 +1,3250 @@ +//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#define DEBUG_TYPE "structcfg" +#ifdef DEBUG +#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) +#else +#define DEBUGME 0 +#endif + +#include "AMDILTargetMachine.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define FirstNonDebugInstr(A) A->begin() +using namespace llvm; + +// bixia TODO: move this out to analysis lib. Make this work for both target +// AMDIL and CBackend. +// TODO: move-begin. + +//===----------------------------------------------------------------------===// +// +// Statistics for CFGStructurizer. +// +//===----------------------------------------------------------------------===// + +STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " + "matched"); +STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " + "matched"); +STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " + "pattern matched"); +STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " + "pattern matched"); +STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " + "matched"); +STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); +STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); + +//===----------------------------------------------------------------------===// +// +// Miscellaneous utility for CFGStructurizer. +// +//===----------------------------------------------------------------------===// +namespace llvmCFGStruct +{ +#define SHOWNEWINSTR(i) \ + if (DEBUGME) errs() << "New instr: " << *i << "\n" + +#define SHOWNEWBLK(b, msg) \ +if (DEBUGME) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + errs() << "\n"; \ +} + +#define SHOWBLK_DETAIL(b, msg) \ +if (DEBUGME) { \ + if (b) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(errs()); \ + errs() << "\n"; \ + } \ +} + +#define INVALIDSCCNUM -1 +#define INVALIDREGNUM 0 + +template +void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { + for (typename LoopinfoT::iterator iter = LoopInfo.begin(), + iterEnd = LoopInfo.end(); + iter != iterEnd; ++iter) { + (*iter)->print(OS, 0); + } +} + +template +void ReverseVector(SmallVector &Src) { + size_t sz = Src.size(); + for (size_t i = 0; i < sz/2; ++i) { + NodeT *t = Src[i]; + Src[i] = Src[sz - i - 1]; + Src[sz - i - 1] = t; + } +} + +} //end namespace llvmCFGStruct + + +//===----------------------------------------------------------------------===// +// +// MachinePostDominatorTree +// +//===----------------------------------------------------------------------===// + +#include "AMDILTargetMachine.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used +/// to compute the a post-dominator tree. +/// +struct MachinePostDominatorTree : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + DominatorTreeBase *DT; + MachinePostDominatorTree() : MachineFunctionPass(ID) + { + DT = new DominatorTreeBase(true); //true indicate + // postdominator + } + + ~MachinePostDominatorTree(); + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + inline const std::vector &getRoots() const { + return DT->getRoots(); + } + + inline MachineDomTreeNode *getRootNode() const { + return DT->getRootNode(); + } + + inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { + return DT->getNode(BB); + } + + inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { + return DT->getNode(BB); + } + + inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { + return DT->dominates(A, B); + } + + inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + return DT->dominates(A, B); + } + + inline bool + properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { + return DT->properlyDominates(A, B); + } + + inline bool + properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + return DT->properlyDominates(A, B); + } + + inline MachineBasicBlock * + findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { + return DT->findNearestCommonDominator(A, B); + } + + virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { + DT->print(OS); + } +}; +} //end of namespace llvm + +char MachinePostDominatorTree::ID = 0; +static RegisterPass +machinePostDominatorTreePass("machinepostdomtree", + "MachinePostDominator Tree Construction", + true, true); + +//const PassInfo *const llvm::MachinePostDominatorsID +//= &machinePostDominatorTreePass; + +bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + //DEBUG(DT->dump()); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +//===----------------------------------------------------------------------===// +// +// supporting data structure for CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +template +struct CFGStructTraits { +}; + +template +class BlockInformation { +public: + bool isRetired; + int sccNum; + //SmallVector succInstr; + //Instructions defining the corresponding successor. + BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} +}; + +template +class LandInformation { +public: + BlockT *landBlk; + std::set breakInitRegs; //Registers that need to "reg = 0", before + //WHILELOOP(thisloop) init before entering + //thisloop. + std::set contInitRegs; //Registers that need to "reg = 0", after + //WHILELOOP(thisloop) init after entering + //thisloop. + std::set endbranchInitRegs; //Init before entering this loop, at loop + //land block, branch cond on this reg. + std::set breakOnRegs; //registers that need to "if (reg) break + //endif" after ENDLOOP(thisloop) break + //outerLoopOf(thisLoop). + std::set contOnRegs; //registers that need to "if (reg) continue + //endif" after ENDLOOP(thisloop) continue on + //outerLoopOf(thisLoop). + LandInformation() : landBlk(NULL) {} +}; + +} //end of namespace llvmCFGStruct + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. +template +class CFGStructurizer +{ +public: + typedef enum { + Not_SinglePath = 0, + SinglePath_InPath = 1, + SinglePath_NotInPath = 2 + } PathToKind; + +public: + typedef typename PassT::InstructionType InstrT; + typedef typename PassT::FunctionType FuncT; + typedef typename PassT::DominatortreeType DomTreeT; + typedef typename PassT::PostDominatortreeType PostDomTreeT; + typedef typename PassT::DomTreeNodeType DomTreeNodeT; + typedef typename PassT::LoopinfoType LoopInfoT; + + typedef GraphTraits FuncGTraits; + //typedef FuncGTraits::nodes_iterator BlockIterator; + typedef typename FuncT::iterator BlockIterator; + + typedef typename FuncGTraits::NodeType BlockT; + typedef GraphTraits BlockGTraits; + typedef GraphTraits > InvBlockGTraits; + //typedef BlockGTraits::succ_iterator InstructionIterator; + typedef typename BlockT::iterator InstrIterator; + + typedef CFGStructTraits CFGTraits; + typedef BlockInformation BlockInfo; + typedef std::map BlockInfoMap; + + typedef int RegiT; + typedef typename PassT::LoopType LoopT; + typedef LandInformation LoopLandInfo; + typedef std::map LoopLandInfoMap; + //landing info for loop break + typedef SmallVector BlockTSmallerVector; + +public: + CFGStructurizer(); + ~CFGStructurizer(); + + /// Perform the CFG structurization + bool run(FuncT &Func, PassT &Pass); + + /// Perform the CFG preparation + bool prepare(FuncT &Func, PassT &Pass); + +private: + void orderBlocks(); + void printOrderedBlocks(llvm::raw_ostream &OS); + int patternMatch(BlockT *CurBlock); + int patternMatchGroup(BlockT *CurBlock); + + int serialPatternMatch(BlockT *CurBlock); + int ifPatternMatch(BlockT *CurBlock); + int switchPatternMatch(BlockT *CurBlock); + int loopendPatternMatch(BlockT *CurBlock); + int loopPatternMatch(BlockT *CurBlock); + + int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + //int loopWithoutBreak(BlockT *); + + void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, + BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); + void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, + BlockT *ContBlock, LoopT *contLoop); + bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); + int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT **LandBlockPtr); + void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT *LandBlock, + bool Detail = false); + PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); + void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); + + void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, + BlockT *TrueBlock, BlockT *FalseBlock, + BlockT *LandBlock); + void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); + void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, + BlockT *ExitLandBlock, RegiT SetReg); + void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, + RegiT SetReg); + BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, + std::set &ExitBlockSet, + BlockT *ExitLandBlk); + BlockT *addLoopEndbranchBlock(LoopT *LoopRep, + BlockTSmallerVector &ExitingBlocks, + BlockTSmallerVector &ExitBlocks); + BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); + void removeUnconditionalBranch(BlockT *SrcBlock); + void removeRedundantConditionalBranch(BlockT *SrcBlock); + void addDummyExitBlock(SmallVector &RetBlocks); + + void removeSuccessor(BlockT *SrcBlock); + BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); + BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); + + void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, + InstrIterator InsertPos); + + void recordSccnum(BlockT *SrcBlock, int SCCNum); + int getSCCNum(BlockT *srcBlk); + + void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); + bool isRetiredBlock(BlockT *SrcBlock); + bool isActiveLoophead(BlockT *CurBlock); + bool needMigrateBlock(BlockT *Block); + + BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, + BlockTSmallerVector &exitBlocks, + std::set &ExitBlockSet); + void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); + BlockT *getLoopLandBlock(LoopT *LoopRep); + LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); + + void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); + + bool hasBackEdge(BlockT *curBlock); + unsigned getLoopDepth (LoopT *LoopRep); + int countActiveBlock( + typename SmallVector::const_iterator IterStart, + typename SmallVector::const_iterator IterEnd); + BlockT *findNearestCommonPostDom(std::set&); + BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); + +private: + DomTreeT *domTree; + PostDomTreeT *postDomTree; + LoopInfoT *loopInfo; + PassT *passRep; + FuncT *funcRep; + + BlockInfoMap blockInfoMap; + LoopLandInfoMap loopLandInfoMap; + SmallVector orderedBlks; + +}; //template class CFGStructurizer + +template CFGStructurizer::CFGStructurizer() + : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { +} + +template CFGStructurizer::~CFGStructurizer() { + for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), + E = blockInfoMap.end(); I != E; ++I) { + delete I->second; + } +} + +template +bool CFGStructurizer::prepare(FuncT &func, PassT &pass) { + passRep = &pass; + funcRep = &func; + + bool changed = false; + //func.RenumberBlocks(); + + //to do, if not reducible flow graph, make it so ??? + + if (DEBUGME) { + errs() << "AMDILCFGStructurizer::prepare\n"; + //func.viewCFG(); + //func.viewCFGOnly(); + //func.dump(); + } + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + //domTree = CFGTraits::getDominatorTree(pass); + //if (DEBUGME) { + // domTree->print(errs()); + //} + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + //postDomTree = CFGTraits::getPostDominatorTree(pass); + //if (DEBUGME) { + // postDomTree->print(errs()); + //} + + //FIXME: gcc complains on this. + //loopInfo = &pass.getAnalysis(); + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + + SmallVector retBlks; + + for (typename LoopInfoT::iterator iter = loopInfo->begin(), + iterEnd = loopInfo->end(); + iter != iterEnd; ++iter) { + LoopT* loopRep = (*iter); + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (exitingBlks.size() == 0) { + BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); + if (dummyExitBlk != NULL) + retBlks.push_back(dummyExitBlk); + } + } + + // Remove unconditional branch instr. + // Add dummy exit block iff there are multiple returns. + + for (typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); + iterBlk != iterEndBlk; + ++iterBlk) { + BlockT *curBlk = *iterBlk; + removeUnconditionalBranch(curBlk); + removeRedundantConditionalBranch(curBlk); + if (CFGTraits::isReturnBlock(curBlk)) { + retBlks.push_back(curBlk); + } + assert(curBlk->succ_size() <= 2); + //assert(curBlk->size() > 0); + //removeEmptyBlock(curBlk) ?? + } //for + + if (retBlks.size() >= 2) { + addDummyExitBlock(retBlks); + changed = true; + } + + return changed; +} //CFGStructurizer::prepare + +template +bool CFGStructurizer::run(FuncT &func, PassT &pass) { + passRep = &pass; + funcRep = &func; + + //func.RenumberBlocks(); + + //Assume reducible CFG... + if (DEBUGME) { + errs() << "AMDILCFGStructurizer::run\n"; + //errs() << func.getFunction()->getNameStr() << "\n"; + func.viewCFG(); + //func.viewCFGOnly(); + //func.dump(); + } + +#if 1 + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + domTree = CFGTraits::getDominatorTree(pass); + if (DEBUGME) { + domTree->print(errs(), (const llvm::Module*)0); + } +#endif + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + postDomTree = CFGTraits::getPostDominatorTree(pass); + if (DEBUGME) { + postDomTree->print(errs()); + } + + //FIXME: gcc complains on this. + //loopInfo = &pass.getAnalysis(); + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); +//#define STRESSTEST +#ifdef STRESSTEST + //Use the worse block ordering to test the algorithm. + ReverseVector(orderedBlks); +#endif + + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + int numIter = 0; + bool finish = false; + BlockT *curBlk; + bool makeProgress = false; + int numRemainedBlk = countActiveBlock(orderedBlks.begin(), + orderedBlks.end()); + + do { + ++numIter; + if (DEBUGME) { + errs() << "numIter = " << numIter + << ", numRemaintedBlk = " << numRemainedBlk << "\n"; + } + + typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(); + typename SmallVector::const_iterator + iterBlkEnd = orderedBlks.end(); + + typename SmallVector::const_iterator + sccBeginIter = iterBlk; + BlockT *sccBeginBlk = NULL; + int sccNumBlk = 0; // The number of active blocks, init to a + // maximum possible number. + int sccNumIter; // Number of iteration in this SCC. + + while (iterBlk != iterBlkEnd) { + curBlk = *iterBlk; + + if (sccBeginBlk == NULL) { + sccBeginIter = iterBlk; + sccBeginBlk = curBlk; + sccNumIter = 0; + sccNumBlk = numRemainedBlk; // Init to maximum possible number. + if (DEBUGME) { + errs() << "start processing SCC" << getSCCNum(sccBeginBlk); + errs() << "\n"; + } + } + + if (!isRetiredBlock(curBlk)) { + patternMatch(curBlk); + } + + ++iterBlk; + + bool contNextScc = true; + if (iterBlk == iterBlkEnd + || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { + // Just finish one scc. + ++sccNumIter; + int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { + if (DEBUGME) { + errs() << "Can't reduce SCC " << getSCCNum(curBlk) + << ", sccNumIter = " << sccNumIter; + errs() << "doesn't make any progress\n"; + } + contNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { + sccNumBlk = sccRemainedNumBlk; + iterBlk = sccBeginIter; + contNextScc = false; + if (DEBUGME) { + errs() << "repeat processing SCC" << getSCCNum(curBlk) + << "sccNumIter = " << sccNumIter << "\n"; + func.viewCFG(); + //func.viewCFGOnly(); + } + } else { + // Finish the current scc. + contNextScc = true; + } + } else { + // Continue on next component in the current scc. + contNextScc = false; + } + + if (contNextScc) { + sccBeginBlk = NULL; + } + } //while, "one iteration" over the function. + + BlockT *entryBlk = FuncGTraits::nodes_begin(&func); + if (entryBlk->succ_size() == 0) { + finish = true; + if (DEBUGME) { + errs() << "Reduce to one block\n"; + } + } else { + int newnumRemainedBlk + = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); + // consider cloned blocks ?? + if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { + makeProgress = true; + numRemainedBlk = newnumRemainedBlk; + } else { + makeProgress = false; + if (DEBUGME) { + errs() << "No progress\n"; + } + } + } + } while (!finish && makeProgress); + + // Misc wrap up to maintain the consistency of the Function representation. + CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); + + // Detach retired Block, release memory. + for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), + iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + if ((*iterMap).second && (*iterMap).second->isRetired) { + assert(((*iterMap).first)->getNumber() != -1); + if (DEBUGME) { + errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; + } + (*iterMap).first->eraseFromParent(); //Remove from the parent Function. + } + delete (*iterMap).second; + } + blockInfoMap.clear(); + + // clear loopLandInfoMap + for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), + iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + delete (*iterMap).second; + } + loopLandInfoMap.clear(); + + if (DEBUGME) { + func.viewCFG(); + //func.dump(); + } + + if (!finish) { + assert(!"IRREDUCIBL_CF"); + } + + return true; +} //CFGStructurizer::run + +/// Print the ordered Blocks. +/// +template +void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { + size_t i = 0; + for (typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); + iterBlk != iterBlkEnd; + ++iterBlk, ++i) { + os << "BB" << (*iterBlk)->getNumber(); + os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + os << "\n"; + } else { + os << " "; + } + } +} //printOrderedBlocks + +/// Compute the reversed DFS post order of Blocks +/// +template void CFGStructurizer::orderBlocks() { + int sccNum = 0; + BlockT *bb; + for (scc_iterator sccIter = scc_begin(funcRep), + sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { + std::vector &sccNext = *sccIter; + for (typename std::vector::const_iterator + blockIter = sccNext.begin(), blockEnd = sccNext.end(); + blockIter != blockEnd; ++blockIter) { + bb = *blockIter; + orderedBlks.push_back(bb); + recordSccnum(bb, sccNum); + } + } + + //walk through all the block in func to check for unreachable + for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), + blockEnd1 = FuncGTraits::nodes_end(funcRep); + blockIter1 != blockEnd1; ++blockIter1) { + BlockT *bb = &(*blockIter1); + sccNum = getSCCNum(bb); + if (sccNum == INVALIDSCCNUM) { + errs() << "unreachable block BB" << bb->getNumber() << "\n"; + } + } //end of for +} //orderBlocks + +template int CFGStructurizer::patternMatch(BlockT *curBlk) { + int numMatch = 0; + int curMatch; + + if (DEBUGME) { + errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; + } + + while ((curMatch = patternMatchGroup(curBlk)) > 0) { + numMatch += curMatch; + } + + if (DEBUGME) { + errs() << "End patternMatch BB" << curBlk->getNumber() + << ", numMatch = " << numMatch << "\n"; + } + + return numMatch; +} //patternMatch + +template +int CFGStructurizer::patternMatchGroup(BlockT *curBlk) { + int numMatch = 0; + numMatch += serialPatternMatch(curBlk); + numMatch += ifPatternMatch(curBlk); + //numMatch += switchPatternMatch(curBlk); + numMatch += loopendPatternMatch(curBlk); + numMatch += loopPatternMatch(curBlk); + return numMatch; +}//patternMatchGroup + +template +int CFGStructurizer::serialPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 1) { + return 0; + } + + BlockT *childBlk = *curBlk->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { + return 0; + } + + mergeSerialBlock(curBlk, childBlk); + ++numSerialPatternMatch; + return 1; +} //serialPatternMatch + +template +int CFGStructurizer::ifPatternMatch(BlockT *curBlk) { + //two edges + if (curBlk->succ_size() != 2) { + return 0; + } + + if (hasBackEdge(curBlk)) { + return 0; + } + + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); + if (branchInstr == NULL) { + return 0; + } + + assert(CFGTraits::isCondBranch(branchInstr)); + + BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); + BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); + BlockT *landBlk; + int cloned = 0; + + // TODO: Simplify + if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 + && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { + landBlk = *trueBlk->succ_begin(); + } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { + landBlk = NULL; + } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { + landBlk = falseBlk; + falseBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && *falseBlk->succ_begin() == trueBlk) { + landBlk = trueBlk; + trueBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && isSameloopDetachedContbreak(trueBlk, falseBlk)) { + landBlk = *falseBlk->succ_begin(); + } else if (trueBlk->succ_size() == 1 + && isSameloopDetachedContbreak(falseBlk, trueBlk)) { + landBlk = *trueBlk->succ_begin(); + } else { + return handleJumpintoIf(curBlk, trueBlk, falseBlk); + } + + // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the + // new BB created for landBlk==NULL may introduce new challenge to the + // reduction process. + if (landBlk != NULL && + ((trueBlk && trueBlk->pred_size() > 1) + || (falseBlk && falseBlk->pred_size() > 1))) { + cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); + } + + if (trueBlk && trueBlk->pred_size() > 1) { + trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); + ++cloned; + } + + if (falseBlk && falseBlk->pred_size() > 1) { + falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); + ++cloned; + } + + mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); + + ++numIfPatternMatch; + + numClonedBlock += cloned; + + return 1 + cloned; +} //ifPatternMatch + +template +int CFGStructurizer::switchPatternMatch(BlockT *curBlk) { + return 0; +} //switchPatternMatch + +template +int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + typename std::vector nestedLoops; + while (loopRep) { + nestedLoops.push_back(loopRep); + loopRep = loopRep->getParentLoop(); + } + + if (nestedLoops.size() == 0) { + return 0; + } + + // Process nested loop outside->inside, so "continue" to a outside loop won't + // be mistaken as "break" of the current loop. + int num = 0; + for (typename std::vector::reverse_iterator + iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); + iter != iterEnd; ++iter) { + loopRep = *iter; + + if (getLoopLandBlock(loopRep) != NULL) { + continue; + } + + BlockT *loopHeader = loopRep->getHeader(); + + int numBreak = loopbreakPatternMatch(loopRep, loopHeader); + + if (numBreak == -1) { + break; + } + + int numCont = loopcontPatternMatch(loopRep, loopHeader); + num += numBreak + numCont; + } + + return num; +} //loopendPatternMatch + +template +int CFGStructurizer::loopPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 0) { + return 0; + } + + int numLoop = 0; + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + if (loopLand) { + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + mergeLooplandBlock(curBlk, loopLand); + ++numLoop; + } + } + loopRep = loopRep->getParentLoop(); + } + + numLoopPatternMatch += numLoop; + + return numLoop; +} //loopPatternMatch + +template +int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (DEBUGME) { + errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; + } + + if (exitingBlks.size() == 0) { + setLoopLandBlock(loopRep); + return 0; + } + + // Compute the corresponding exitBlks and exit block set. + BlockTSmallerVector exitBlks; + std::set exitBlkSet; + for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), + iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { + BlockT *exitingBlk = *iter; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + exitBlks.push_back(exitBlk); + exitBlkSet.insert(exitBlk); //non-duplicate insert + } + + assert(exitBlkSet.size() > 0); + assert(exitBlks.size() == exitingBlks.size()); + + if (DEBUGME) { + errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; + } + + // Find exitLandBlk. + BlockT *exitLandBlk = NULL; + int numCloned = 0; + int numSerial = 0; + + if (exitBlkSet.size() == 1) + { + exitLandBlk = *exitBlkSet.begin(); + } else { + exitLandBlk = findNearestCommonPostDom(exitBlkSet); + + if (exitLandBlk == NULL) { + return -1; + } + + bool allInPath = true; + bool allNotInPath = true; + for (typename std::set::const_iterator + iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + + PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); + if (DEBUGME) { + errs() << "BB" << exitBlk->getNumber() + << " to BB" << exitLandBlk->getNumber() << " PathToKind=" + << pathKind << "\n"; + } + + allInPath = allInPath && (pathKind == SinglePath_InPath); + allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); + + if (!allInPath && !allNotInPath) { + if (DEBUGME) { + errs() << "singlePath check fail\n"; + } + return -1; + } + } // check all exit blocks + + if (allNotInPath) { +#if 1 + + // TODO: Simplify, maybe separate function? + //funcRep->viewCFG(); + LoopT *parentLoopRep = loopRep->getParentLoop(); + BlockT *parentLoopHeader = NULL; + if (parentLoopRep) + parentLoopHeader = parentLoopRep->getHeader(); + + if (exitLandBlk == parentLoopHeader && + (exitLandBlk = relocateLoopcontBlock(parentLoopRep, + loopRep, + exitBlkSet, + exitLandBlk)) != NULL) { + if (DEBUGME) { + errs() << "relocateLoopcontBlock success\n"; + } + } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, + exitingBlks, + exitBlks)) != NULL) { + if (DEBUGME) { + errs() << "insertEndbranchBlock success\n"; + } + } else { + if (DEBUGME) { + errs() << "loop exit fail\n"; + } + return -1; + } +#else + return -1; +#endif + } + + // Handle side entry to exit path. + exitBlks.clear(); + exitBlkSet.clear(); + for (typename BlockTSmallerVector::iterator iterExiting = + exitingBlks.begin(), + iterExitingEnd = exitingBlks.end(); + iterExiting != iterExitingEnd; ++iterExiting) { + BlockT *exitingBlk = *iterExiting; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + BlockT *newExitBlk = exitBlk; + + if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { + newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); + ++numCloned; + } + + numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); + + exitBlks.push_back(newExitBlk); + exitBlkSet.insert(newExitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + numSerial += serialPatternMatch(exitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + if (exitBlk->pred_size() > 1) { + if (exitBlk != exitLandBlk) { + return -1; + } + } else { + if (exitBlk != exitLandBlk && + (exitBlk->succ_size() != 1 || + *exitBlk->succ_begin() != exitLandBlk)) { + return -1; + } + } + } + } // else + + // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); + exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); + + // Fold break into the breaking block. Leverage across level breaks. + assert(exitingBlks.size() == exitBlks.size()); + for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), + iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit, ++iterExiting) { + BlockT *exitBlk = *iterExit; + BlockT *exitingBlk = *iterExiting; + assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); + LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); + handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); + } + + int numBreak = static_cast(exitingBlks.size()); + numLoopbreakPatternMatch += numBreak; + numClonedBlock += numCloned; + return numBreak + numSerial + numCloned; +} //loopbreakPatternMatch + +template +int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + int numCont = 0; + SmallVector contBlk; + for (typename InvBlockGTraits::ChildIteratorType iter = + InvBlockGTraits::child_begin(loopHeader), + iterEnd = InvBlockGTraits::child_end(loopHeader); + iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + if (loopRep->contains(curBlk)) { + handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), + loopHeader, loopRep); + contBlk.push_back(curBlk); + ++numCont; + } + } + + for (typename SmallVector::iterator + iter = contBlk.begin(), iterEnd = contBlk.end(); + iter != iterEnd; ++iter) { + (*iter)->removeSuccessor(loopHeader); + } + + numLoopcontPatternMatch += numCont; + + return numCont; +} //loopcontPatternMatch + + +template +bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, + BlockT *src2Blk) { + // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the + // same loop with LoopLandInfo without explicitly keeping track of + // loopContBlks and loopBreakBlks, this is a method to get the information. + // + if (src1Blk->succ_size() == 0) { + LoopT *loopRep = loopInfo->getLoopFor(src1Blk); + if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + if (theEntry != NULL) { + if (DEBUGME) { + errs() << "isLoopContBreakBlock yes src1 = BB" + << src1Blk->getNumber() + << " src2 = BB" << src2Blk->getNumber() << "\n"; + } + return true; + } + } + } + return false; +} //isSameloopDetachedContbreak + +template +int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); + if (num == 0) { + if (DEBUGME) { + errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + } + num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); + } + return num; +} + +template +int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = 0; + BlockT *downBlk; + + //trueBlk could be the common post dominator + downBlk = trueBlk; + + if (DEBUGME) { + errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() + << " true = BB" << trueBlk->getNumber() + << ", numSucc=" << trueBlk->succ_size() + << " false = BB" << falseBlk->getNumber() << "\n"; + } + + while (downBlk) { + if (DEBUGME) { + errs() << "check down = BB" << downBlk->getNumber(); + } + + if (//postDomTree->dominates(downBlk, falseBlk) && + singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { + if (DEBUGME) { + errs() << " working\n"; + } + + num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); + num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); + + numClonedBlock += num; + num += serialPatternMatch(*headBlk->succ_begin()); + num += serialPatternMatch(*(++headBlk->succ_begin())); + num += ifPatternMatch(headBlk); + assert(num > 0); // + + break; + } + if (DEBUGME) { + errs() << " not working\n"; + } + downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; + } // walk down the postDomTree + + return num; +} //handleJumpintoIf + +template +void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk, + bool detail) { + errs() << "head = BB" << headBlk->getNumber() + << " size = " << headBlk->size(); + if (detail) { + errs() << "\n"; + headBlk->print(errs()); + errs() << "\n"; + } + + if (trueBlk) { + errs() << ", true = BB" << trueBlk->getNumber() << " size = " + << trueBlk->size() << " numPred = " << trueBlk->pred_size(); + if (detail) { + errs() << "\n"; + trueBlk->print(errs()); + errs() << "\n"; + } + } + if (falseBlk) { + errs() << ", false = BB" << falseBlk->getNumber() << " size = " + << falseBlk->size() << " numPred = " << falseBlk->pred_size(); + if (detail) { + errs() << "\n"; + falseBlk->print(errs()); + errs() << "\n"; + } + } + if (landBlk) { + errs() << ", land = BB" << landBlk->getNumber() << " size = " + << landBlk->size() << " numPred = " << landBlk->pred_size(); + if (detail) { + errs() << "\n"; + landBlk->print(errs()); + errs() << "\n"; + } + } + + errs() << "\n"; +} //showImproveSimpleJumpintoIf + +template +int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT **plandBlk) { + bool migrateTrue = false; + bool migrateFalse = false; + + BlockT *landBlk = *plandBlk; + + assert((trueBlk == NULL || trueBlk->succ_size() <= 1) + && (falseBlk == NULL || falseBlk->succ_size() <= 1)); + + if (trueBlk == falseBlk) { + return 0; + } + +#if 0 + if (DEBUGME) { + errs() << "improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + } +#endif + + // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; + // May consider the # landBlk->pred_size() as it represents the number of + // assignment initReg = .. needed to insert. + migrateTrue = needMigrateBlock(trueBlk); + migrateFalse = needMigrateBlock(falseBlk); + + if (!migrateTrue && !migrateFalse) { + return 0; + } + + // If we need to migrate either trueBlk and falseBlk, migrate the rest that + // have more than one predecessors. without doing this, its predecessor + // rather than headBlk will have undefined value in initReg. + if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { + migrateTrue = true; + } + if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { + migrateFalse = true; + } + + if (DEBUGME) { + errs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); + } + + // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk + // + // new: headBlk => if () {initReg = 1; org trueBlk branch} else + // {initReg = 0; org falseBlk branch } + // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} + // => org landBlk + // if landBlk->pred_size() > 2, put the about if-else inside + // if (initReg !=2) {...} + // + // add initReg = initVal to headBlk + unsigned initReg = + funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass); + if (!migrateTrue || !migrateFalse) { + int initVal = migrateTrue ? 0 : 1; + CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); + } + + int numNewBlk = 0; + + if (landBlk == NULL) { + landBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(landBlk); //insert to function + + if (trueBlk) { + trueBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + if (falseBlk) { + falseBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + numNewBlk ++; + } + + bool landBlkHasOtherPred = (landBlk->pred_size() > 2); + + //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL" + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos + (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep)); + + if (landBlkHasOtherPred) { + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); + unsigned cmpResReg = + funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass); + + CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, + initReg, immReg); + CFGTraits::insertCondBranchBefore(landBlk, insertPos, + AMDIL::IF_LOGICALZ_i32, passRep, + cmpResReg, DebugLoc()); + } + + CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32, + passRep, initReg, DebugLoc()); + + if (migrateTrue) { + migrateInstruction(trueBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 1). + CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); + } + CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep); + + if (migrateFalse) { + migrateInstruction(falseBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 0) + CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); + } + //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep); + + if (landBlkHasOtherPred) { + // add endif + CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep); + + // put initReg = 2 to other predecessors of landBlk + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); predIter != predIterEnd; + ++predIter) { + BlockT *curBlk = *predIter; + if (curBlk != trueBlk && curBlk != falseBlk) { + CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); + } + } //for + } + if (DEBUGME) { + errs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); + } + + // update landBlk + *plandBlk = landBlk; + + return numNewBlk; +} //improveSimpleJumpintoIf + +template +void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, + LoopT *exitingLoop, + BlockT *exitBlk, + LoopT *exitLoop, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) + << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; + } + + RegiT initReg = INVALIDREGNUM; + if (exitingLoop != exitLoop) { + initReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass)); + assert(initReg != INVALIDREGNUM); + addLoopBreakInitReg(exitLoop, initReg); + while (exitingLoop != exitLoop && exitingLoop) { + addLoopBreakOnReg(exitingLoop, initReg); + exitingLoop = exitingLoop->getParentLoop(); + } + assert(exitingLoop == exitLoop); + } + + mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); + +} //handleLoopbreak + +template +void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, + LoopT *contingLoop, + BlockT *contBlk, + LoopT *contLoop) { + if (DEBUGME) { + errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() + << " header = BB" << contBlk->getNumber() << "\n"; + + errs() << "Trying to continue loop-depth = " + << getLoopDepth(contLoop) + << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; + } + + RegiT initReg = INVALIDREGNUM; + if (contingLoop != contLoop) { + initReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass)); + assert(initReg != INVALIDREGNUM); + addLoopContInitReg(contLoop, initReg); + while (contingLoop && contingLoop->getParentLoop() != contLoop) { + addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg + contingLoop = contingLoop->getParentLoop(); + } + assert(contingLoop && contingLoop->getParentLoop() == contLoop); + addLoopContOnReg(contingLoop, initReg); + } + + settleLoopcontBlock(contingBlk, contBlk, initReg); + //contingBlk->removeSuccessor(loopHeader); +} //handleLoopcontBlock + +template +void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "serialPattern BB" << dstBlk->getNumber() + << " <= BB" << srcBlk->getNumber() << "\n"; + } + //removeUnconditionalBranch(dstBlk); + dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); + + dstBlk->removeSuccessor(srcBlk); + CFGTraits::cloneSuccessorList(dstBlk, srcBlk); + + removeSuccessor(srcBlk); + retireBlock(dstBlk, srcBlk); +} //mergeSerialBlock + +template +void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, + BlockT *curBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "ifPattern BB" << curBlk->getNumber(); + errs() << "{ "; + if (trueBlk) { + errs() << "BB" << trueBlk->getNumber(); + } + errs() << " } else "; + errs() << "{ "; + if (falseBlk) { + errs() << "BB" << falseBlk->getNumber(); + } + errs() << " }\n "; + errs() << "landBlock: "; + if (landBlk == NULL) { + errs() << "NULL"; + } else { + errs() << "BB" << landBlk->getNumber(); + } + errs() << "\n"; + } + + int oldOpcode = branchInstr->getOpcode(); + DebugLoc branchDL = branchInstr->getDebugLoc(); + +// transform to +// if cond +// trueBlk +// else +// falseBlk +// endif +// landBlk + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(curBlk, branchInstr); + CFGTraits::insertCondBranchBefore(branchInstrPos, + CFGTraits::getBranchNzeroOpcode(oldOpcode), + passRep, + branchDL); + + if (trueBlk) { + curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); + curBlk->removeSuccessor(trueBlk); + if (landBlk && trueBlk->succ_size()!=0) { + trueBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, trueBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep); + + if (falseBlk) { + curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), + falseBlk->end()); + curBlk->removeSuccessor(falseBlk); + if (landBlk && falseBlk->succ_size() != 0) { + falseBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, falseBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep); + + //curBlk->remove(branchInstrPos); + branchInstr->eraseFromParent(); + + if (landBlk && trueBlk && falseBlk) { + curBlk->addSuccessor(landBlk); + } + +} //mergeIfthenelseBlock + +template +void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, + LoopLandInfo *loopLand) { + BlockT *landBlk = loopLand->landBlk; + + if (DEBUGME) { + errs() << "loopPattern header = BB" << dstBlk->getNumber() + << " land = BB" << landBlk->getNumber() << "\n"; + } + + // Loop contInitRegs are init at the beginning of the loop. + for (typename std::set::const_iterator iter = + loopLand->contInitRegs.begin(), + iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the + * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk. + * search for the DebugLoc in the that statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); + DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreak); + // Loop breakInitRegs are init before entering the loop. + for (typename std::set::const_iterator iter = + loopLand->breakInitRegs.begin(), + iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) + { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + // Loop endbranchInitRegs are init before entering the loop. + for (typename std::set::const_iterator iter = + loopLand->endbranchInitRegs.begin(), + iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the continue statement in the current dstBlk + * search for the DebugLoc in the continue statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); + DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue); + // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this + // loop. + for (typename std::set::const_iterator iter = + loopLand->breakOnRegs.begin(), + iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep, + *iter); + } + + // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this + // loop. + for (std::set::const_iterator iter = loopLand->contOnRegs.begin(), + iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32, + passRep, *iter); + } + + dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); + + for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), + iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. + } + + removeSuccessor(landBlk); + retireBlock(dstBlk, landBlk); +} //mergeLooplandBlock + +template +void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, + BlockT *exitBlk, + BlockT *exitLandBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() + << " exit = BB" << exitBlk->getNumber() + << " land = BB" << exitLandBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + + DebugLoc DL = branchInstr->getDebugLoc(); + + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + int oldOpcode = branchInstr->getOpcode(); + + // transform exitingBlk to + // if ( ) { + // exitBlk (if exitBlk != exitLandBlk) + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(exitingBlk) - exitBlk} + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(exitingBlk, branchInstr); + + if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { + //break_logical + int newOpcode = + (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode) + : CFGTraits::getBreakZeroOpcode(oldOpcode); + CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); + } else { + int newOpcode = + (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode) + : CFGTraits::getBranchZeroOpcode(oldOpcode); + CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), + exitBlk->end()); + } + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep); + CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep); + } //if_logical + + //now branchInst can be erase safely + //exitingBlk->eraseFromParent(branchInstr); + branchInstr->eraseFromParent(); + + //now take care of successors, retire blocks + exitingBlk->removeSuccessor(exitBlk); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitBlk->removeSuccessor(exitLandBlk); + retireBlock(exitingBlk, exitBlk); + } + +} //mergeLoopbreakBlock + +template +void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, + BlockT *contBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "settleLoopcontBlock conting = BB" + << contingBlk->getNumber() + << ", cont = BB" << contBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); + if (branchInstr) { + assert(CFGTraits::isCondBranch(branchInstr)); + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(contingBlk, branchInstr); + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + int oldOpcode = branchInstr->getOpcode(); + DebugLoc DL = branchInstr->getDebugLoc(); + + // transform contingBlk to + // if () { + // move instr after branchInstr + // continue + // or + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(contingBlk) - loopHeader} + + bool useContinueLogical = + (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); + + if (useContinueLogical == false) + { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) + : CFGTraits::getBranchZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL); + } + + CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL); + } else { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) + : CFGTraits::getContinueZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + } + + //contingBlk->eraseFromParent(branchInstr); + branchInstr->eraseFromParent(); + } else { + /* if we've arrived here then we've already erased the branch instruction + * travel back up the basic block to see the last reference of our debug location + * we've just inserted that reference here so it should be representative */ + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } + } //else + +} //settleLoopcontBlock + +// BBs in exitBlkSet are determined as in break-path for loopRep, +// before we can put code for BBs as inside loop-body for loopRep +// check whether those BBs are determined as cont-BB for parentLoopRep +// earlier. +// If so, generate a new BB newBlk +// (1) set newBlk common successor of BBs in exitBlkSet +// (2) change the continue-instr in BBs in exitBlkSet to break-instr +// (3) generate continue-instr in newBlk +// +template +typename CFGStructurizer::BlockT * +CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, + LoopT *loopRep, + std::set &exitBlkSet, + BlockT *exitLandBlk) { + std::set endBlkSet; + +// BlockT *parentLoopHead = parentLoopRep->getHeader(); + + + for (typename std::set::const_iterator iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); + + if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) + return NULL; + + endBlkSet.insert(endBlk); + } + + BlockT *newBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newBlk); //insert to function + CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep); + SHOWNEWBLK(newBlk, "New continue block: "); + + for (typename std::set::const_iterator iter = endBlkSet.begin(), + iterEnd = endBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *endBlk = *iter; + InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); + if (contInstr) { + contInstr->eraseFromParent(); + } + endBlk->addSuccessor(newBlk); + if (DEBUGME) { + errs() << "Add new continue Block to BB" + << endBlk->getNumber() << " successors\n"; + } + } + + return newBlk; +} //relocateLoopcontBlock + + +// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as +// LoopLandBlock. This BB branch on the loop endBranchInit register to the +// pathes corresponding to the loop exiting branches. + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::addLoopEndbranchBlock(LoopT *loopRep, + BlockTSmallerVector &exitingBlks, + BlockTSmallerVector &exitBlks) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + + RegiT endBranchReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass)); + assert(endBranchReg >= 0); + + // reg = 0 before entering the loop + addLoopEndbranchInitReg(loopRep, endBranchReg); + + uint32_t numBlks = static_cast(exitingBlks.size()); + assert(numBlks >=2 && numBlks == exitBlks.size()); + + BlockT *preExitingBlk = exitingBlks[0]; + BlockT *preExitBlk = exitBlks[0]; + BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(preBranchBlk); //insert to function + SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); + + BlockT *newLandBlk = preBranchBlk; + + CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, + newLandBlk); + preExitingBlk->removeSuccessor(preExitBlk); + preExitingBlk->addSuccessor(newLandBlk); + + //it is redundant to add reg = 0 to exitingBlks[0] + + // For 1..n th exiting path (the last iteration handles two pathes) create the + // branch to the previous path and the current path. + for (uint32_t i = 1; i < numBlks; ++i) { + BlockT *curExitingBlk = exitingBlks[i]; + BlockT *curExitBlk = exitBlks[i]; + BlockT *curBranchBlk; + + if (i == numBlks - 1) { + curBranchBlk = curExitBlk; + } else { + curBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(curBranchBlk); //insert to function + SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); + } + + // Add reg = i to exitingBlks[i]. + CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, + endBranchReg, i); + + // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge + // (exitingBlks[i], newLandBlk). + CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, + newLandBlk); + curExitingBlk->removeSuccessor(curExitBlk); + curExitingBlk->addSuccessor(newLandBlk); + + // add to preBranchBlk the branch instruction: + // if (endBranchReg == preVal) + // preExitBlk + // else + // curBranchBlk + // + // preValReg = i - 1 + + DebugLoc DL; + RegiT preValReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass)); + BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg) + .addImm(i - 1); //preVal + + // condResReg = (endBranchReg == preValReg) + RegiT condResReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass)); + BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg) + .addReg(endBranchReg).addReg(preValReg); + + BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32)) + .addMBB(preExitBlk).addReg(condResReg); + + preBranchBlk->addSuccessor(preExitBlk); + preBranchBlk->addSuccessor(curBranchBlk); + + // Update preExitingBlk, preExitBlk, preBranchBlk. + preExitingBlk = curExitingBlk; + preExitBlk = curExitBlk; + preBranchBlk = curBranchBlk; + + } //end for 1 .. n blocks + + return newLandBlk; +} //addLoopEndbranchBlock + +template +typename CFGStructurizer::PathToKind +CFGStructurizer::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return Not_SinglePath; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return SinglePath_NotInPath; + } + + return Not_SinglePath; +} //singlePathTo + +// If there is a single path from srcBlk to dstBlk, return the last block before +// dstBlk If there is a single path from srcBlk->end without dstBlk, return the +// last block in the path Otherwise, return NULL +template +typename CFGStructurizer::BlockT * +CFGStructurizer::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return srcBlk; + } + + if (srcBlk->succ_size() == 0) { + return srcBlk; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + BlockT *preBlk = srcBlk; + + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == NULL) { + return preBlk; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return NULL; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return srcBlk; + } + + return NULL; + +} //singlePathEnd + +template +int CFGStructurizer::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, + BlockT *dstBlk) { + int cloned = 0; + assert(preBlk->isSuccessor(srcBlk)); + while (srcBlk && srcBlk != dstBlk) { + assert(srcBlk->succ_size() == 1); + if (srcBlk->pred_size() > 1) { + srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); + ++cloned; + } + + preBlk = srcBlk; + srcBlk = *srcBlk->succ_begin(); + } + + return cloned; +} //cloneOnSideEntryTo + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, + BlockT *predBlk) { + assert(predBlk->isSuccessor(curBlk) && + "succBlk is not a prececessor of curBlk"); + + BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions + CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); + //srcBlk, oldBlk, newBlk + + predBlk->removeSuccessor(curBlk); + predBlk->addSuccessor(cloneBlk); + + // add all successor to cloneBlk + CFGTraits::cloneSuccessorList(cloneBlk, curBlk); + + numClonedInstr += curBlk->size(); + + if (DEBUGME) { + errs() << "Cloned block: " << "BB" + << curBlk->getNumber() << "size " << curBlk->size() << "\n"; + } + + SHOWNEWBLK(cloneBlk, "result of Cloned block: "); + + return cloneBlk; +} //cloneBlockForPredecessor + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::exitingBlock2ExitBlock(LoopT *loopRep, + BlockT *exitingBlk) { + BlockT *exitBlk = NULL; + + for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), + iterSuccEnd = exitingBlk->succ_end(); + iterSucc != iterSuccEnd; ++iterSucc) { + BlockT *curBlk = *iterSucc; + if (!loopRep->contains(curBlk)) { + assert(exitBlk == NULL); + exitBlk = curBlk; + } + } + + assert(exitBlk != NULL); + + return exitBlk; +} //exitingBlock2ExitBlock + +template +void CFGStructurizer::migrateInstruction(BlockT *srcBlk, + BlockT *dstBlk, + InstrIterator insertPos) { + InstrIterator spliceEnd; + //look for the input branchinstr, not the AMDIL branchinstr + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + if (branchInstr == NULL) { + if (DEBUGME) { + errs() << "migrateInstruction don't see branch instr\n" ; + } + spliceEnd = srcBlk->end(); + } else { + if (DEBUGME) { + errs() << "migrateInstruction see branch instr\n" ; + branchInstr->dump(); + } + spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); + } + if (DEBUGME) { + errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } + + //splice insert before insertPos + dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); + + if (DEBUGME) { + errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } +} //migrateInstruction + +// normalizeInfiniteLoopExit change +// B1: +// uncond_br LoopHeader +// +// to +// B1: +// cond_br 1 LoopHeader dummyExit +// and return the newly added dummy exit block +// +template +typename CFGStructurizer::BlockT * +CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { + BlockT *loopHeader; + BlockT *loopLatch; + loopHeader = LoopRep->getHeader(); + loopLatch = LoopRep->getLoopLatch(); + BlockT *dummyExitBlk = NULL; + if (loopHeader!=NULL && loopLatch!=NULL) { + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); + if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { + dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + + if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; + + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos(loopLatch, branchInstr); + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); + InstrT *newInstr = + CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32, passRep); + MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); + + SHOWNEWINSTR(newInstr); + + branchInstr->eraseFromParent(); + loopLatch->addSuccessor(dummyExitBlk); + } + } + + return dummyExitBlk; +} //normalizeInfiniteLoopExit + +template +void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { + InstrT *branchInstr; + + // I saw two unconditional branch in one basic block in example + // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. + while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) + && CFGTraits::isUncondBranch(branchInstr)) { + if (DEBUGME) { + errs() << "Removing unconditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + } +} //removeUnconditionalBranch + +template +void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { + if (srcBlk->succ_size() == 2) { + BlockT *blk1 = *srcBlk->succ_begin(); + BlockT *blk2 = *(++srcBlk->succ_begin()); + + if (blk1 == blk2) { + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + if (DEBUGME) { + errs() << "Removing unneeded conditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + SHOWNEWBLK(blk1, "Removing redundant successor"); + srcBlk->removeSuccessor(blk1); + } + } +} //removeRedundantConditionalBranch + +template +void CFGStructurizer::addDummyExitBlock(SmallVector &retBlks) { + BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep); + + for (typename SmallVector::iterator iter = + retBlks.begin(), + iterEnd = retBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); + if (curInstr) { + curInstr->eraseFromParent(); + } +#if 0 + if (curBlk->size()==0 && curBlk->pred_size() == 1) { + if (DEBUGME) { + errs() << "Replace empty block BB" << curBlk->getNumber() + << " with dummyExitBlock\n"; + } + BlockT *predb = *curBlk->pred_begin(); + predb->removeSuccessor(curBlk); + curBlk = predb; + } //handle empty curBlk +#endif + curBlk->addSuccessor(dummyExitBlk); + if (DEBUGME) { + errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() + << " successors\n"; + } + } //for + + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); +} //addDummyExitBlock + +template +void CFGStructurizer::removeSuccessor(BlockT *srcBlk) { + while (srcBlk->succ_size()) { + srcBlk->removeSuccessor(*srcBlk->succ_begin()); + } +} + +template +void CFGStructurizer::recordSccnum(BlockT *srcBlk, int sccNum) { + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->sccNum = sccNum; +} + +template +int CFGStructurizer::getSCCNum(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; +} + +template +void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; + } + + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->isRetired = true; + //int i = srcBlk->succ_size(); + //int j = srcBlk->pred_size(); + assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 + && "can't retire block yet"); +} + +template +bool CFGStructurizer::isRetiredBlock(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return (srcBlkInfo && srcBlkInfo->isRetired); +} + +template +bool CFGStructurizer::isActiveLoophead(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + + if(loopLand == NULL) + return true; + + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + return true; + } + + loopRep = loopRep->getParentLoop(); + } + + return false; +} //isActiveLoophead + +template +bool CFGStructurizer::needMigrateBlock(BlockT *blk) { + const unsigned blockSizeThreshold = 30; + const unsigned cloneInstrThreshold = 100; + + bool multiplePreds = blk && (blk->pred_size() > 1); + + if(!multiplePreds) + return false; + + unsigned blkSize = blk->size(); + return ((blkSize > blockSizeThreshold) + && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); +} //needMigrateBlock + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, + BlockTSmallerVector &exitBlks, + std::set &exitBlkSet) { + SmallVector inpathBlks; //in exit path blocks + + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); + predIter != predIterEnd; ++predIter) { + BlockT *curBlk = *predIter; + if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { + inpathBlks.push_back(curBlk); + } + } //for + + //if landBlk has predecessors that are not in the given loop, + //create a new block + BlockT *newLandBlk = landBlk; + if (inpathBlks.size() != landBlk->pred_size()) { + newLandBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newLandBlk); //insert to function + newLandBlk->addSuccessor(landBlk); + for (typename SmallVector::iterator iter = + inpathBlks.begin(), + iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); + //srcBlk, oldBlk, newBlk + curBlk->removeSuccessor(landBlk); + curBlk->addSuccessor(newLandBlk); + } + for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { + if (exitBlks[i] == landBlk) { + exitBlks[i] = newLandBlk; + } + } + SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); + } + + setLoopLandBlock(loopRep, newLandBlk); + + return newLandBlk; +} // recordLoopbreakLand + +template +void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + assert(theEntry->landBlk == NULL); + + if (blk == NULL) { + blk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(blk); //insert to function + SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); + } + + theEntry->landBlk = blk; + + if (DEBUGME) { + errs() << "setLoopLandBlock loop-header = BB" + << loopRep->getHeader()->getNumber() + << " landing-block = BB" << blk->getNumber() << "\n"; + } +} // setLoopLandBlock + +template +void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + + theEntry->breakOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakOnReg + +template +void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContOnReg + +template +void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->breakInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakInitReg + +template +void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContInitReg + +template +void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, + RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->endbranchInitRegs.insert(regNum); + + if (DEBUGME) + { + errs() << "addLoopEndbranchInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopEndbranchInitReg + +template +typename CFGStructurizer::LoopLandInfo * +CFGStructurizer::getLoopLandInfo(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry; +} // getLoopLandInfo + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::getLoopLandBlock(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry ? theEntry->landBlk : NULL; +} // getLoopLandBlock + + +template +bool CFGStructurizer::hasBackEdge(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + if (loopRep == NULL) + return false; + + BlockT *loopHeader = loopRep->getHeader(); + + return curBlk->isSuccessor(loopHeader); + +} //hasBackEdge + +template +unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { + return loopRep ? loopRep->getLoopDepth() : 0; +} //getLoopDepth + +template +int CFGStructurizer::countActiveBlock +(typename SmallVector::const_iterator iterStart, + typename SmallVector::const_iterator iterEnd) { + int count = 0; + while (iterStart != iterEnd) { + if (!isRetiredBlock(*iterStart)) { + ++count; + } + ++iterStart; + } + + return count; +} //countActiveBlock + +// This is work around solution for findNearestCommonDominator not avaiable to +// post dom a proper fix should go to Dominators.h. + +template +typename CFGStructurizer::BlockT* +CFGStructurizer::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { + + if (postDomTree->dominates(blk1, blk2)) { + return blk1; + } + if (postDomTree->dominates(blk2, blk1)) { + return blk2; + } + + DomTreeNodeT *node1 = postDomTree->getNode(blk1); + DomTreeNodeT *node2 = postDomTree->getNode(blk2); + + // Handle newly cloned node. + if (node1 == NULL && blk1->succ_size() == 1) { + return findNearestCommonPostDom(*blk1->succ_begin(), blk2); + } + if (node2 == NULL && blk2->succ_size() == 1) { + return findNearestCommonPostDom(blk1, *blk2->succ_begin()); + } + + if (node1 == NULL || node2 == NULL) { + return NULL; + } + + node1 = node1->getIDom(); + while (node1) { + if (postDomTree->dominates(node1, node2)) { + return node1->getBlock(); + } + node1 = node1->getIDom(); + } + + return NULL; +} + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::findNearestCommonPostDom +(typename std::set &blks) { + BlockT *commonDom; + typename std::set::const_iterator iter = blks.begin(); + typename std::set::const_iterator iterEnd = blks.end(); + for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { + BlockT *curBlk = *iter; + if (curBlk != commonDom) { + commonDom = findNearestCommonPostDom(curBlk, commonDom); + } + } + + if (DEBUGME) { + errs() << "Common post dominator for exit blocks is "; + if (commonDom) { + errs() << "BB" << commonDom->getNumber() << "\n"; + } else { + errs() << "NULL\n"; + } + } + + return commonDom; +} //findNearestCommonPostDom + +} //end namespace llvm + +//todo: move-end + + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer for AMDIL +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGStructurizer : public MachineFunctionPass +{ +public: + typedef MachineInstr InstructionType; + typedef MachineFunction FunctionType; + typedef MachineBasicBlock BlockType; + typedef MachineLoopInfo LoopinfoType; + typedef MachineDominatorTree DominatortreeType; + typedef MachinePostDominatorTree PostDominatortreeType; + typedef MachineDomTreeNode DomTreeNodeType; + typedef MachineLoop LoopType; +//private: + TargetMachine &TM; + const TargetInstrInfo *TII; + +//public: +// static char ID; + +public: + AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + const TargetInstrInfo *getTargetInstrInfo() const; + //bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGStructurizer + +//char AMDILCFGStructurizer::ID = 0; +} //end of namespace llvm +AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) +: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()) { +} + +const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const { + return TII; +} +//===----------------------------------------------------------------------===// +// +// CFGPrepare +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGPrepare : public AMDILCFGStructurizer +{ +public: + static char ID; + +public: + AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGPrepare + +char AMDILCFGPrepare::ID = 0; +} //end of namespace llvm + +AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) +{ +} +const char *AMDILCFGPrepare::getPassName() const { + return "AMD IL Control Flow Graph Preparation Pass"; +} + +void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +//===----------------------------------------------------------------------===// +// +// CFGPerform +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGPerform : public AMDILCFGStructurizer +{ +public: + static char ID; + +public: + AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGPerform + +char AMDILCFGPerform::ID = 0; +} //end of namespace llvm + + AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) +: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) +{ +} + +const char *AMDILCFGPerform::getPassName() const { + return "AMD IL Control Flow Graph structurizer Pass"; +} + +void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +//===----------------------------------------------------------------------===// +// +// CFGStructTraits +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +// this class is tailor to the AMDIL backend +template<> +struct CFGStructTraits +{ + typedef int RegiT; + + static int getBreakNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBreakZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBranchNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBranchZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getContinueNzeroOpcode(int oldOpcode) + { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getContinueZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + +// the explicitly represented branch target is the true branch target +#define getExplicitBranch getTrueBranch +#define setExplicitBranch setTrueBranch + + static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { + return instr->getOperand(0).getMBB(); + } + + static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { + instr->getOperand(0).setMBB(blk); + } + + static MachineBasicBlock * + getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { + assert(blk->succ_size() == 2); + MachineBasicBlock *trueBranch = getTrueBranch(instr); + MachineBasicBlock::succ_iterator iter = blk->succ_begin(); + MachineBasicBlock::succ_iterator iterNext = iter; + ++iterNext; + + return (*iter == trueBranch) ? *iterNext : *iter; + } + + static bool isCondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND); + break; + default: + return false; + } + return true; + } + + static bool isUncondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + case AMDIL::BRANCH: + break; + default: + return false; + } + return true; + } + + static bool isPhimove(MachineInstr *instr) { + switch (instr->getOpcode()) { + ExpandCaseToAllTypes(AMDIL::MOVE); + break; + default: + return false; + } + return true; + } + + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { + MachineInstr *instr = &(*iter); + if (instr->getDebugLoc().isUnknown() == false) { + DL = instr->getDebugLoc(); + } + } + return DL; + } + + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + MachineInstr *instr = &*iter; + if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { + return instr; + } + return NULL; + } + + // The correct naming for this is getPossibleLoopendBlockBranchInstr. + // + // BB with backward-edge could have move instructions after the branch + // instruction. Such move instruction "belong to" the loop backward-edge. + // + static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { + for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), + iterEnd = blk->rend(); iter != iterEnd; ++iter) { + // FIXME: Simplify + MachineInstr *instr = &*iter; + if (instr) { + if (isCondBranch(instr) || isUncondBranch(instr)) { + return instr; + } else if (!isPhimove(instr)) { + break; + } + } + } + return NULL; + } + + static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDIL::RETURN) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDIL::CONTINUE) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { + for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { + MachineInstr *instr = &(*iter); + if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) { + return instr; + } + } + return NULL; + } + + static bool isReturnBlock(MachineBasicBlock *blk) { + MachineInstr *instr = getReturnInstr(blk); + bool isReturn = (blk->succ_size() == 0); + if (instr) { + assert(isReturn); + } else if (isReturn) { + if (DEBUGME) { + errs() << "BB" << blk->getNumber() + <<" is return block without RETURN instr\n"; + } + } + + return isReturn; + } + + static MachineBasicBlock::iterator + getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { + assert(instr->getParent() == blk && "instruction doesn't belong to block"); + MachineBasicBlock::iterator iter = blk->begin(); + MachineBasicBlock::iterator iterEnd = blk->end(); + while (&(*iter) != instr && iter != iterEnd) { + ++iter; + } + + assert(iter != iterEnd); + return iter; + }//getInstrPos + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep) { + return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrBefore + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); + + MachineBasicBlock::iterator res; + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + return newInstr; + } //insertInstrBefore + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep) { + insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrEnd + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = blk->getParent() + ->CreateMachineInstr(tii->get(newOpcode), DL); + + blk->push_back(newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + } //insertInstrEnd + + static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDILCFGStructurizer *passRep) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), + DebugLoc()); + + blk->insert(instrPos, newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + return newInstr; + } //insertInstrBefore + + static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDILCFGStructurizer *passRep, + DebugLoc DL) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), + DL); + + blk->insert(instrPos, newInstr); + MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), + false); + + SHOWNEWINSTR(newInstr); + //erase later oldInstr->eraseFromParent(); + } //insertCondBranchBefore + + static void insertCondBranchBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator insertPos, + int newOpcode, + AMDILCFGStructurizer *passRep, + RegiT regNum, + DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); + + //insert before + blk->insert(insertPos, newInstr); + MachineInstrBuilder(newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchBefore + + static void insertCondBranchEnd(MachineBasicBlock *blk, + int newOpcode, + AMDILCFGStructurizer *passRep, + RegiT regNum) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); + + blk->push_back(newInstr); + MachineInstrBuilder(newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchEnd + + + static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, + AMDILCFGStructurizer *passRep, + RegiT regNum, int regVal) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32), + DebugLoc()); + MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target + MachineInstrBuilder(newInstr).addImm(regVal); //set src value + + blk->insert(instrPos, newInstr); + + SHOWNEWINSTR(newInstr); + } //insertAssignInstrBefore + + static void insertAssignInstrBefore(MachineBasicBlock *blk, + AMDILCFGStructurizer *passRep, + RegiT regNum, int regVal) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32), + DebugLoc()); + MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target + MachineInstrBuilder(newInstr).addImm(regVal); //set src value + + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + } //insertInstrBefore + + static void insertCompareInstrBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator instrPos, + AMDILCFGStructurizer *passRep, + RegiT dstReg, RegiT src1Reg, + RegiT src2Reg) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc()); + + MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target + MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value + MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value + + blk->insert(instrPos, newInstr); + SHOWNEWINSTR(newInstr); + + } //insertCompareInstrBefore + + static void cloneSuccessorList(MachineBasicBlock *dstBlk, + MachineBasicBlock *srcBlk) { + for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), + iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of + } + } //cloneSuccessorList + + static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { + MachineFunction *func = srcBlk->getParent(); + MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); + func->push_back(newBlk); //insert to function + //newBlk->setNumber(srcBlk->getNumber()); + for (MachineBasicBlock::iterator iter = srcBlk->begin(), + iterEnd = srcBlk->end(); + iter != iterEnd; ++iter) { + MachineInstr *instr = func->CloneMachineInstr(iter); + // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr + // does not clone the AsmPrinterFlags. + instr->setAsmPrinterFlag( + (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags()); + newBlk->push_back(instr); + } + return newBlk; + } + + //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because + //the AMDIL instruction is not recognized as terminator fix this and retire + //this routine + static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, + MachineBasicBlock *oldBlk, + MachineBasicBlock *newBlk) { + MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); + if (branchInstr && isCondBranch(branchInstr) && + getExplicitBranch(branchInstr) == oldBlk) { + setExplicitBranch(branchInstr, newBlk); + } + } + + static void wrapup(MachineBasicBlock *entryBlk) { + assert((!entryBlk->getParent()->getJumpTableInfo() + || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector contInstr; + MachineBasicBlock::iterator pre = entryBlk->begin(); + MachineBasicBlock::iterator iterEnd = entryBlk->end(); + MachineBasicBlock::iterator iter = pre; + while (iter != iterEnd) { + if (pre->getOpcode() == AMDIL::CONTINUE + && iter->getOpcode() == AMDIL::ENDLOOP) { + contInstr.push_back(pre); + } + pre = iter; + ++iter; + } //end while + + //delete continue right before endloop + for (unsigned i = 0; i < contInstr.size(); ++i) { + contInstr[i]->eraseFromParent(); + } + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} + + } //wrapup + + static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } + + static MachinePostDominatorTree* + getPostDominatorTree(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } + + static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } +}; // template class CFGStructTraits +} //end of namespace llvm + +// createAMDILCFGPreparationPass- Returns a pass +FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) { + return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR); +} + +bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer().prepare(func, + *this); +} + +// createAMDILCFGStructurizerPass- Returns a pass +FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) { + return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR); +} + +bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer().run(func, + *this); +} + +//end of file newline goes below + diff --git a/lib/Target/AMDIL/AMDILCodeEmitter.h b/lib/Target/AMDIL/AMDILCodeEmitter.h new file mode 100644 index 00000000000..b0ea1455cf9 --- /dev/null +++ b/lib/Target/AMDIL/AMDILCodeEmitter.h @@ -0,0 +1,46 @@ +// The LLVM Compiler Infrastructure +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// +//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// +//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// + +#ifndef AMDILCODEEMITTER_H +#define AMDILCODEEMITTER_H + +namespace llvm { + + /* XXX: Temp HACK to work around tablegen name generation */ + class AMDILCodeEmitter { + public: + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { return 0; } + virtual unsigned GPR4AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual unsigned GPR2AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual uint64_t VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const { + return Value; + } + virtual uint64_t i32LiteralEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + }; + +} // End namespace llvm + +#endif // AMDILCODEEMITTER_H diff --git a/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp b/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp new file mode 100644 index 00000000000..9366f2e7bcb --- /dev/null +++ b/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp @@ -0,0 +1,158 @@ +//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdil-emitter" +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +#if 0 +namespace { + class AMDILMCCodeEmitter : public MCCodeEmitter { + AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT + void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + const TargetInstrInfo &TII; + MCContext &Ctx; + bool Is64BitMode; + public: + AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit); + ~AMDILMCCodeEmitter(); + unsigned getNumFixupKinds() const; + const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const; + static unsigned GetAMDILRegNum(const MCOperand &MO); + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const; + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const; + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, + MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os, + SmallVectorImpl &Fixups, int ImmOffset = 0) const; + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + }; // class AMDILMCCodeEmitter +}; // anonymous namespace + +namespace llvm { + MCCodeEmitter *createAMDILMCCodeEmitter(const Target &, + TargetMachine &TM, MCContext &Ctx) + { + return new AMDILMCCodeEmitter(TM, Ctx, false); + } +} + +AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx + , bool is64Bit) +: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) +{ + Is64BitMode = is64Bit; +} + +AMDILMCCodeEmitter::~AMDILMCCodeEmitter() +{ +} + +unsigned +AMDILMCCodeEmitter::getNumFixupKinds() const +{ + return 0; +} + +const MCFixupKindInfo & +AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const +{ +// const static MCFixupKindInfo Infos[] = {}; + if (Kind < FirstTargetFixupKind) { + return MCCodeEmitter::getFixupKindInfo(Kind); + } + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return MCCodeEmitter::getFixupKindInfo(Kind); + // return Infos[Kind - FirstTargetFixupKind]; + +} + +void +AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte, + raw_ostream &OS) const +{ + OS << (char) C; + ++CurByte; +} +void +AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const +{ + // Output the constant in little endian byte order + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, CurByte, OS); + Val >>= 8; + } +} +void +AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize, + MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups, int ImmOffset) const +{ + // If this is a simple integer displacement that doesn't require a relocation + // emit it now. + if (DispOp.isImm()) { + EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS); + } + + // If we have an immoffset, add it to the expression + const MCExpr *Expr = DispOp.getExpr(); + + if (ImmOffset) { + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(ImmOffset, Ctx), Ctx); + } + // Emit a symbolic constant as a fixup and 4 zeros. + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + // TODO: Why the 4 zeros? + EmitConstant(0, ImmSize, CurByte, OS); +} + +void +AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const +{ +#if 0 + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + unsigned TSFlags = Desc.TSFlags; + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = 0; + + unsigned char BaseOpcode = 0; +#ifndef NDEBUG + // FIXME: Verify. + if (// !Desc.isVariadic() && + CurOp != NumOps) { + errs() << "Cannot encode all operands of: "; + MI.dump(); + errs() << '\n'; + abort(); + } +#endif +#endif +} +#endif diff --git a/lib/Target/AMDIL/AMDILMachinePeephole.cpp b/lib/Target/AMDIL/AMDILMachinePeephole.cpp new file mode 100644 index 00000000000..b8e536361f0 --- /dev/null +++ b/lib/Target/AMDIL/AMDILMachinePeephole.cpp @@ -0,0 +1,173 @@ +//===-- AMDILMachinePeephole.cpp - AMDIL Machine Peephole Pass -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "machine_peephole" +#if !defined(NDEBUG) +#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) +#else +#define DEBUGME (false) +#endif + +#include "AMDIL.h" +#include "AMDILSubtarget.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +namespace +{ + class AMDILMachinePeephole : public MachineFunctionPass + { + public: + static char ID; + AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + //virtual ~AMDILMachinePeephole(); + virtual const char* + getPassName() const; + virtual bool + runOnMachineFunction(MachineFunction &MF); + private: + void insertFence(MachineBasicBlock::iterator &MIB); + TargetMachine &TM; + bool mDebug; + }; // AMDILMachinePeephole + char AMDILMachinePeephole::ID = 0; +} // anonymous namespace + +namespace llvm +{ + FunctionPass* + createAMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + { + return new AMDILMachinePeephole(tm AMDIL_OPT_LEVEL_VAR); + } +} // llvm namespace + +AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + : MachineFunctionPass(ID), TM(tm) +{ + mDebug = DEBUGME; +} + +bool +AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) +{ + bool Changed = false; + const AMDILSubtarget *STM = &TM.getSubtarget(); + for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end(); + MBB != MBE; ++MBB) { + MachineBasicBlock *mb = MBB; + for (MachineBasicBlock::iterator MIB = mb->begin(), MIE = mb->end(); + MIB != MIE; ++MIB) { + MachineInstr *mi = MIB; + const char * name; + name = TM.getInstrInfo()->getName(mi->getOpcode()); + switch (mi->getOpcode()) { + default: + if (isAtomicInst(TM.getInstrInfo(), mi)) { + // If we don't support the hardware accellerated address spaces, + // then the atomic needs to be transformed to the global atomic. + if (strstr(name, "_L_") + && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) { + BuildMI(*mb, MIB, mi->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::ADD_i32), AMDIL::R1011) + .addReg(mi->getOperand(1).getReg()) + .addReg(AMDIL::T2); + mi->getOperand(1).setReg(AMDIL::R1011); + mi->setDesc( + TM.getInstrInfo()->get( + (mi->getOpcode() - AMDIL::ATOM_L_ADD) + AMDIL::ATOM_G_ADD)); + } else if (strstr(name, "_R_") + && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) { + assert(!"Software region memory is not supported!"); + mi->setDesc( + TM.getInstrInfo()->get( + (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD)); + } + } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) { + insertFence(MIB); + } + continue; + break; + case AMDIL::USHR_i16: + case AMDIL::USHR_v2i16: + case AMDIL::USHR_v4i16: + case AMDIL::USHRVEC_i16: + case AMDIL::USHRVEC_v2i16: + case AMDIL::USHRVEC_v4i16: + if (TM.getSubtarget() + .device()->usesSoftware(AMDILDeviceInfo::ShortOps)) { + unsigned lReg = MF.getRegInfo() + .createVirtualRegister(&AMDIL::GPRI32RegClass); + unsigned Reg = MF.getRegInfo() + .createVirtualRegister(&AMDIL::GPRV4I32RegClass); + BuildMI(*mb, MIB, mi->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::LOADCONST_i32), + lReg).addImm(0xFFFF); + BuildMI(*mb, MIB, mi->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32), + Reg) + .addReg(mi->getOperand(1).getReg()) + .addReg(lReg); + mi->getOperand(1).setReg(Reg); + } + break; + case AMDIL::USHR_i8: + case AMDIL::USHR_v2i8: + case AMDIL::USHR_v4i8: + case AMDIL::USHRVEC_i8: + case AMDIL::USHRVEC_v2i8: + case AMDIL::USHRVEC_v4i8: + if (TM.getSubtarget() + .device()->usesSoftware(AMDILDeviceInfo::ByteOps)) { + unsigned lReg = MF.getRegInfo() + .createVirtualRegister(&AMDIL::GPRI32RegClass); + unsigned Reg = MF.getRegInfo() + .createVirtualRegister(&AMDIL::GPRV4I32RegClass); + BuildMI(*mb, MIB, mi->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::LOADCONST_i32), + lReg).addImm(0xFF); + BuildMI(*mb, MIB, mi->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32), + Reg) + .addReg(mi->getOperand(1).getReg()) + .addReg(lReg); + mi->getOperand(1).setReg(Reg); + } + break; + } + } + } + return Changed; +} + +const char* +AMDILMachinePeephole::getPassName() const +{ + return "AMDIL Generic Machine Peephole Optimization Pass"; +} + +void +AMDILMachinePeephole::insertFence(MachineBasicBlock::iterator &MIB) +{ + MachineInstr *MI = MIB; + MachineInstr *fence = BuildMI(*(MI->getParent()->getParent()), + MI->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1); + + MI->getParent()->insert(MIB, fence); + fence = BuildMI(*(MI->getParent()->getParent()), + MI->getDebugLoc(), + TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1); + MIB = MI->getParent()->insertAfter(MIB, fence); +} diff --git a/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp b/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp new file mode 100644 index 00000000000..5fe9f53c8c8 --- /dev/null +++ b/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp @@ -0,0 +1,1138 @@ +//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#define DEBUG_TYPE "PeepholeOpt" +#ifdef DEBUG +#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) +#else +#define DEBUGME 0 +#endif + +#include "AMDILAlgorithms.tpp" +#include "AMDILDevices.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +#include + +#if 0 +STATISTIC(PointerAssignments, "Number of dynamic pointer " + "assigments discovered"); +STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); +#endif + +using namespace llvm; +// The Peephole optimization pass is used to do simple last minute optimizations +// that are required for correct code or to remove redundant functions +namespace { +class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass { +public: + TargetMachine &TM; + static char ID; + AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + ~AMDILPeepholeOpt(); + const char *getPassName() const; + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + void getAnalysisUsage(AnalysisUsage &AU) const; +protected: +private: + // Function to initiate all of the instruction level optimizations. + bool instLevelOptimizations(BasicBlock::iterator *inst); + // Quick check to see if we need to dump all of the pointers into the + // arena. If this is correct, then we set all pointers to exist in arena. This + // is a workaround for aliasing of pointers in a struct/union. + bool dumpAllIntoArena(Function &F); + // Because I don't want to invalidate any pointers while in the + // safeNestedForEachFunction. I push atomic conversions to a vector and handle + // it later. This function does the conversions if required. + void doAtomicConversionIfNeeded(Function &F); + // Because __amdil_is_constant cannot be properly evaluated if + // optimizations are disabled, the call's are placed in a vector + // and evaluated after the __amdil_image* functions are evaluated + // which should allow the __amdil_is_constant function to be + // evaluated correctly. + void doIsConstCallConversionIfNeeded(); + bool mChanged; + bool mDebug; + bool mConvertAtomics; + CodeGenOpt::Level optLevel; + // Run a series of tests to see if we can optimize a CALL instruction. + bool optimizeCallInst(BasicBlock::iterator *bbb); + // A peephole optimization to optimize bit extract sequences. + bool optimizeBitExtract(Instruction *inst); + // A peephole optimization to optimize bit insert sequences. + bool optimizeBitInsert(Instruction *inst); + bool setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift); + // Expand the bit field insert instruction on versions of OpenCL that + // don't support it. + bool expandBFI(CallInst *CI); + // Expand the bit field mask instruction on version of OpenCL that + // don't support it. + bool expandBFM(CallInst *CI); + // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in + // this case we need to expand them. These functions check for 24bit functions + // and then expand. + bool isSigned24BitOps(CallInst *CI); + void expandSigned24BitOps(CallInst *CI); + // One optimization that can occur is that if the required workgroup size is + // specified then the result of get_local_size is known at compile time and + // can be returned accordingly. + bool isRWGLocalOpt(CallInst *CI); + // On northern island cards, the division is slightly less accurate than on + // previous generations, so we need to utilize a more accurate division. So we + // can translate the accurate divide to a normal divide on all other cards. + bool convertAccurateDivide(CallInst *CI); + void expandAccurateDivide(CallInst *CI); + // If the alignment is set incorrectly, it can produce really inefficient + // code. This checks for this scenario and fixes it if possible. + bool correctMisalignedMemOp(Instruction *inst); + + // If we are in no opt mode, then we need to make sure that + // local samplers are properly propagated as constant propagation + // doesn't occur and we need to know the value of kernel defined + // samplers at compile time. + bool propagateSamplerInst(CallInst *CI); + + LLVMContext *mCTX; + Function *mF; + const AMDILSubtarget *mSTM; + SmallVector< std::pair, 16> atomicFuncs; + SmallVector isConstVec; +}; // class AMDILPeepholeOpt + char AMDILPeepholeOpt::ID = 0; +} // anonymous namespace + +namespace llvm { + FunctionPass * + createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + { + return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR); + } +} // llvm namespace + +AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + : FunctionPass(ID), TM(tm) +{ + mDebug = DEBUGME; + optLevel = TM.getOptLevel(); + +} + +AMDILPeepholeOpt::~AMDILPeepholeOpt() +{ +} + +const char * +AMDILPeepholeOpt::getPassName() const +{ + return "AMDIL PeepHole Optimization Pass"; +} + +bool +containsPointerType(Type *Ty) +{ + if (!Ty) { + return false; + } + switch(Ty->getTypeID()) { + default: + return false; + case Type::StructTyID: { + const StructType *ST = dyn_cast(Ty); + for (StructType::element_iterator stb = ST->element_begin(), + ste = ST->element_end(); stb != ste; ++stb) { + if (!containsPointerType(*stb)) { + continue; + } + return true; + } + break; + } + case Type::VectorTyID: + case Type::ArrayTyID: + return containsPointerType(dyn_cast(Ty)->getElementType()); + case Type::PointerTyID: + return true; + }; + return false; +} + +bool +AMDILPeepholeOpt::dumpAllIntoArena(Function &F) +{ + bool dumpAll = false; + for (Function::const_arg_iterator cab = F.arg_begin(), + cae = F.arg_end(); cab != cae; ++cab) { + const Argument *arg = cab; + const PointerType *PT = dyn_cast(arg->getType()); + if (!PT) { + continue; + } + Type *DereferencedType = PT->getElementType(); + if (!dyn_cast(DereferencedType) + ) { + continue; + } + if (!containsPointerType(DereferencedType)) { + continue; + } + // FIXME: Because a pointer inside of a struct/union may be aliased to + // another pointer we need to take the conservative approach and place all + // pointers into the arena until more advanced detection is implemented. + dumpAll = true; + } + return dumpAll; +} +void +AMDILPeepholeOpt::doIsConstCallConversionIfNeeded() +{ + if (isConstVec.empty()) { + return; + } + for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { + CallInst *CI = isConstVec[x]; + Constant *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + CI->eraseFromParent(); + } + isConstVec.clear(); +} +void +AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F) +{ + // Don't do anything if we don't have any atomic operations. + if (atomicFuncs.empty()) { + return; + } + // Change the function name for the atomic if it is required + uint32_t size = atomicFuncs.size(); + for (uint32_t x = 0; x < size; ++x) { + atomicFuncs[x].first->setOperand( + atomicFuncs[x].first->getNumOperands()-1, + atomicFuncs[x].second); + + } + mChanged = true; + if (mConvertAtomics) { + return; + } +} + +bool +AMDILPeepholeOpt::runOnFunction(Function &MF) +{ + mChanged = false; + mF = &MF; + mSTM = &TM.getSubtarget(); + if (mDebug) { + MF.dump(); + } + mCTX = &MF.getType()->getContext(); + mConvertAtomics = true; + safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), + std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations), + this)); + + doAtomicConversionIfNeeded(MF); + doIsConstCallConversionIfNeeded(); + + if (mDebug) { + MF.dump(); + } + return mChanged; +} + +bool +AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) +{ + Instruction *inst = (*bbb); + CallInst *CI = dyn_cast(inst); + if (!CI) { + return false; + } + if (isSigned24BitOps(CI)) { + expandSigned24BitOps(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (propagateSamplerInst(CI)) { + return false; + } + if (expandBFI(CI) || expandBFM(CI)) { + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (convertAccurateDivide(CI)) { + expandAccurateDivide(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + + StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); + if (calleeName.startswith("__amdil_is_constant")) { + // If we do not have optimizations, then this + // cannot be properly evaluated, so we add the + // call instruction to a vector and process + // them at the end of processing after the + // samplers have been correctly handled. + if (optLevel == CodeGenOpt::None) { + isConstVec.push_back(CI); + return false; + } else { + Constant *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + } + + if (calleeName.equals("__amdil_is_asic_id_i32")) { + ConstantInt *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = CV; + if (Val) { + Val = ConstantInt::get(aType, + mSTM->device()->getDeviceFlag() & CV->getZExtValue()); + } else { + Val = ConstantInt::get(aType, 0); + } + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + Function *F = dyn_cast(CI->getOperand(CI->getNumOperands()-1)); + if (!F) { + return false; + } + if (F->getName().startswith("__atom") && !CI->getNumUses() + && F->getName().find("_xchg") == StringRef::npos) { + std::string buffer(F->getName().str() + "_noret"); + F = dyn_cast( + F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); + atomicFuncs.push_back(std::make_pair (CI, F)); + } + + if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) + && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { + return false; + } + if (!mConvertAtomics) { + return false; + } + StringRef name = F->getName(); + if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { + mConvertAtomics = false; + } + return false; +} + +bool +AMDILPeepholeOpt::setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift) +{ + if (!base) { + if (mDebug) { + dbgs() << "Null pointer passed into function.\n"; + } + return false; + } + bool andOp = false; + if (base->getOpcode() == Instruction::Shl) { + shift = dyn_cast(base->getOperand(1)); + } else if (base->getOpcode() == Instruction::And) { + mask = dyn_cast(base->getOperand(1)); + andOp = true; + } else { + if (mDebug) { + dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; + } + // If the base is neither a Shl or a And, we don't fit any of the patterns above. + return false; + } + src = dyn_cast(base->getOperand(0)); + if (!src) { + if (mDebug) { + dbgs() << "Failed setup since the base operand is not an instruction!\n"; + } + return false; + } + // If we find an 'and' operation, then we don't need to + // find the next operation as we already know the + // bits that are valid at this point. + if (andOp) { + return true; + } + if (src->getOpcode() == Instruction::Shl && !shift) { + shift = dyn_cast(src->getOperand(1)); + src = dyn_cast(src->getOperand(0)); + } else if (src->getOpcode() == Instruction::And && !mask) { + mask = dyn_cast(src->getOperand(1)); + } + if (!mask && !shift) { + if (mDebug) { + dbgs() << "Failed setup since both mask and shift are NULL!\n"; + } + // Did not find a constant mask or a shift. + return false; + } + return true; +} +bool +AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst) +{ + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::Or) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do an optimization on a sequence of ops that in the end equals a + // single ISA instruction. + // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) + // Some simplified versions of this pattern are as follows: + // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 + // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E + // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B + // (A & B) | (D << F) when (1 << F) >= B + // (A << C) | (D & E) when (1 << C) >= E + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // The HD4XXX hardware doesn't support the ubit_insert instruction. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + int numEle = 1; + // This optimization only works on 32bit integers. + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + // TODO: Handle vectors. + if (isVector) { + if (mDebug) { + dbgs() << "!!! Vectors are not supported yet!\n"; + } + return false; + } + Instruction *LHSSrc = NULL, *RHSSrc = NULL; + Constant *LHSMask = NULL, *RHSMask = NULL; + Constant *LHSShift = NULL, *RHSShift = NULL; + Instruction *LHS = dyn_cast(inst->getOperand(0)); + Instruction *RHS = dyn_cast(inst->getOperand(1)); + if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (LHS) { LHS->dump(); } + if (LHSSrc) { LHSSrc->dump(); } + if (LHSMask) { LHSMask->dump(); } + if (LHSShift) { LHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (RHS) { RHS->dump(); } + if (RHSSrc) { RHSSrc->dump(); } + if (RHSMask) { RHSMask->dump(); } + if (RHSShift) { RHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (mDebug) { + dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; + dbgs() << "Op: "; inst->dump(); + dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } + } + Constant *offset = NULL; + Constant *width = NULL; + int32_t lhsMaskVal = 0, rhsMaskVal = 0; + int32_t lhsShiftVal = 0, rhsShiftVal = 0; + int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; + int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; + lhsMaskVal = (int32_t)(LHSMask + ? dyn_cast(LHSMask)->getZExtValue() : 0); + rhsMaskVal = (int32_t)(RHSMask + ? dyn_cast(RHSMask)->getZExtValue() : 0); + lhsShiftVal = (int32_t)(LHSShift + ? dyn_cast(LHSShift)->getZExtValue() : 0); + rhsShiftVal = (int32_t)(RHSShift + ? dyn_cast(RHSShift)->getZExtValue() : 0); + lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; + rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; + lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; + rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; + // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). + if (mDebug) { + dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); + dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; + dbgs() << (RHSMask ? " & E)" : ")"); + dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); + dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; + dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; + dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; + dbgs() << "width(B) = " << lhsMaskWidth; + dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; + dbgs() << "offset(B) = " << lhsMaskOffset; + dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; + dbgs() << "Constraints: \n"; + dbgs() << "\t(1) B ^ E == 0\n"; + dbgs() << "\t(2-LHS) B is a mask\n"; + dbgs() << "\t(2-LHS) E is a mask\n"; + dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; + dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; + } + if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { + if (mDebug) { + dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; + dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; + dbgs() << "Failed constraint 1!\n"; + } + return false; + } + if (mDebug) { + dbgs() << "LHS = " << lhsMaskOffset << ""; + dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; + dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); + dbgs() << "\nRHS = " << rhsMaskOffset << ""; + dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; + dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); + dbgs() << "\n"; + } + if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { + offset = ConstantInt::get(aType, lhsMaskOffset, false); + width = ConstantInt::get(aType, lhsMaskWidth, false); + RHSSrc = RHS; + if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { + if (mDebug) { + dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; + dbgs() << "Failed constraint 2!\n"; + } + return false; + } + if (!LHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } else if (lhsShiftVal != lhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } + if (mDebug) { + dbgs() << "Optimizing LHS!\n"; + } + } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { + offset = ConstantInt::get(aType, rhsMaskOffset, false); + width = ConstantInt::get(aType, rhsMaskWidth, false); + LHSSrc = RHSSrc; + RHSSrc = LHS; + if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { + if (mDebug) { + dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; + dbgs() << "Failed constraint 2!\n"; + } + return false; + } + if (!RHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } else if (rhsShiftVal != rhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } + if (mDebug) { + dbgs() << "Optimizing RHS!\n"; + } + } else { + if (mDebug) { + dbgs() << "Failed constraint 3!\n"; + } + return false; + } + if (mDebug) { + dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } + } + if (!offset || !width) { + if (mDebug) { + dbgs() << "Either width or offset are NULL, failed detection!\n"; + } + return false; + } + // Lets create the function signature. + std::vector callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "__amdil_ubit_insert"; + if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } + Function *Func = + dyn_cast(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[4] = { + width, + offset, + LHSSrc, + RHSSrc + }; + CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); + if (mDebug) { + dbgs() << "Old Inst: "; + inst->dump(); + dbgs() << "New Inst: "; + CI->dump(); + dbgs() << "\n\n"; + } + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst) +{ + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::And) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do some simple optimizations on Shift right/And patterns. The + // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a + // value smaller than 32 and C is a mask. If C is a constant value, then the + // following transformation can occur. For signed integers, it turns into the + // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned + // integers, it turns into the function call dst = + // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract + // can be found in Section 7.9 of the ATI IL spec of the stream SDK for + // Evergreen hardware. + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // This does not work on HD4XXX hardware. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + int numEle = 1; + // This only works on 32bit integers + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + BinaryOperator *ShiftInst = dyn_cast(inst->getOperand(0)); + // If the first operand is not a shift instruction, then we can return as it + // doesn't match this pattern. + if (!ShiftInst || !ShiftInst->isShift()) { + return false; + } + // If we are a shift left, then we need don't match this pattern. + if (ShiftInst->getOpcode() == Instruction::Shl) { + return false; + } + bool isSigned = ShiftInst->isArithmeticShift(); + Constant *AndMask = dyn_cast(inst->getOperand(1)); + Constant *ShrVal = dyn_cast(ShiftInst->getOperand(1)); + // Lets make sure that the shift value and the and mask are constant integers. + if (!AndMask || !ShrVal) { + return false; + } + Constant *newMaskConst; + Constant *shiftValConst; + if (isVector) { + // Handle the vector case + std::vector maskVals; + std::vector shiftVals; + ConstantVector *AndMaskVec = dyn_cast(AndMask); + ConstantVector *ShrValVec = dyn_cast(ShrVal); + Type *scalarType = AndMaskVec->getType()->getScalarType(); + assert(AndMaskVec->getNumOperands() == + ShrValVec->getNumOperands() && "cannot have a " + "combination where the number of elements to a " + "shift and an and are different!"); + for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { + ConstantInt *AndCI = dyn_cast(AndMaskVec->getOperand(x)); + ConstantInt *ShiftIC = dyn_cast(ShrValVec->getOperand(x)); + if (!AndCI || !ShiftIC) { + return false; + } + uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); + // If the mask or shiftval is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left + // then this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); + shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); + } + newMaskConst = ConstantVector::get(maskVals); + shiftValConst = ConstantVector::get(shiftVals); + } else { + // Handle the scalar case + uint32_t maskVal = (uint32_t)dyn_cast(AndMask)->getZExtValue(); + // This must be a mask value where all lower bits are set to 1 and then any + // bit higher is set to 0. + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + // Count the number of bits set in the mask, this is the width of the + // resulting bit set that is extracted from the source value. + uint32_t shiftVal = (uint32_t)dyn_cast(ShrVal)->getZExtValue(); + // If the mask or shift val is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left then + // this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + newMaskConst = ConstantInt::get(aType, maskVal, isSigned); + shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); + } + // Lets create the function signature. + std::vector callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "__amdil_ubit_extract"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + // Lets create the function. + Function *Func = + dyn_cast(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[3] = { + newMaskConst, + shiftValConst, + ShiftInst->getOperand(0) + }; + // Lets create the Call with the operands + CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDILPeepholeOpt::expandBFI(CallInst *CI) +{ + if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfi")) { + return false; + } + Type* type = CI->getOperand(0)->getType(); + Constant *negOneConst = NULL; + if (type->isVectorTy()) { + std::vector negOneVals; + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + for (size_t x = 0, + y = dyn_cast(type)->getNumElements(); x < y; ++x) { + negOneVals.push_back(negOneConst); + } + negOneConst = ConstantVector::get(negOneVals); + } else { + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + } + // __amdil_bfi => (A & B) | (~A & C) + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + CI->getOperand(1), "bfi_and", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, + "bfi_not", CI); + rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), + "bfi_and", CI); + lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDILPeepholeOpt::expandBFM(CallInst *CI) +{ + if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfm")) { + return false; + } + // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) + Constant *newMaskConst = NULL; + Constant *newShiftConst = NULL; + Type* type = CI->getOperand(0)->getType(); + if (type->isVectorTy()) { + std::vector newMaskVals, newShiftVals; + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + for (size_t x = 0, + y = dyn_cast(type)->getNumElements(); x < y; ++x) { + newMaskVals.push_back(newMaskConst); + newShiftVals.push_back(newShiftConst); + } + newMaskConst = ConstantVector::get(newMaskVals); + newShiftConst = ConstantVector::get(newShiftVals); + } else { + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + } + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, + lhs, "bfm_shl", CI); + lhs = BinaryOperator::Create(Instruction::Sub, lhs, + newShiftConst, "bfm_sub", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(1), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) +{ + Instruction *inst = (*bbb); + if (optimizeCallInst(bbb)) { + return true; + } + if (optimizeBitExtract(inst)) { + return false; + } + if (optimizeBitInsert(inst)) { + return false; + } + if (correctMisalignedMemOp(inst)) { + return false; + } + return false; +} +bool +AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst) +{ + LoadInst *linst = dyn_cast(inst); + StoreInst *sinst = dyn_cast(inst); + unsigned alignment; + Type* Ty = inst->getType(); + if (linst) { + alignment = linst->getAlignment(); + Ty = inst->getType(); + } else if (sinst) { + alignment = sinst->getAlignment(); + Ty = sinst->getValueOperand()->getType(); + } else { + return false; + } + unsigned size = getTypeSize(Ty); + if (size == alignment || size < alignment) { + return false; + } + if (!Ty->isStructTy()) { + return false; + } + if (alignment < 4) { + if (linst) { + linst->setAlignment(0); + return true; + } else if (sinst) { + sinst->setAlignment(0); + return true; + } + } + return false; +} +bool +AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI) +{ + if (!CI) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + std::string namePrefix = LHS->getName().substr(0, 14); + if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" + && namePrefix != "__amdil__imul24_high") { + return false; + } + if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { + return false; + } + return true; +} + +void +AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI) +{ + assert(isSigned24BitOps(CI) && "Must be a " + "signed 24 bit operation to call this function!"); + Value *LHS = CI->getOperand(CI->getNumOperands()-1); + // On 7XX and 8XX we do not have signed 24bit, so we need to + // expand it to the following: + // imul24 turns into 32bit imul + // imad24 turns into 32bit imad + // imul24_high turns into 32bit imulhigh + if (LHS->getName().substr(0, 14) == "__amdil_imad24") { + Type *aType = CI->getOperand(0)->getType(); + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; + std::vector callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + callTypes.push_back(CI->getOperand(2)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imad"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[3] = { + CI->getOperand(0), + CI->getOperand(1), + CI->getOperand(2) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { + BinaryOperator *mulOp = + BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), + CI->getOperand(1), "imul24", CI); + CI->replaceAllUsesWith(mulOp); + } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { + Type *aType = CI->getOperand(0)->getType(); + + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; + std::vector callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imul_high"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[2] = { + CI->getOperand(0), + CI->getOperand(1) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } +} + +bool +AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI) +{ + return (CI != NULL + && CI->getOperand(CI->getNumOperands() - 1)->getName() + == "__amdil_get_local_size_int"); +} + +bool +AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI) +{ + if (!CI) { + return false; + } + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX + && (mSTM->getDeviceName() == "cayman")) { + return false; + } + return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) + == "__amdil_improved_div"; +} + +void +AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI) +{ + assert(convertAccurateDivide(CI) + && "expanding accurate divide can only happen if it is expandable!"); + BinaryOperator *divOp = + BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), + CI->getOperand(1), "fdiv32", CI); + CI->replaceAllUsesWith(divOp); +} + +bool +AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI) +{ + if (optLevel != CodeGenOpt::None) { + return false; + } + + if (!CI) { + return false; + } + + unsigned funcNameIdx = 0; + funcNameIdx = CI->getNumOperands() - 1; + StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); + if (calleeName != "__amdil_image2d_read_norm" + && calleeName != "__amdil_image2d_read_unnorm" + && calleeName != "__amdil_image3d_read_norm" + && calleeName != "__amdil_image3d_read_unnorm") { + return false; + } + + unsigned samplerIdx = 2; + samplerIdx = 1; + Value *sampler = CI->getOperand(samplerIdx); + LoadInst *lInst = dyn_cast(sampler); + if (!lInst) { + return false; + } + + if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { + return false; + } + + GlobalVariable *gv = dyn_cast(lInst->getPointerOperand()); + // If we are loading from what is not a global value, then we + // fail and return. + if (!gv) { + return false; + } + + // If we don't have an initializer or we have an initializer and + // the initializer is not a 32bit integer, we fail. + if (!gv->hasInitializer() + || !gv->getInitializer()->getType()->isIntegerTy(32)) { + return false; + } + + // Now that we have the global variable initializer, lets replace + // all uses of the load instruction with the samplerVal and + // reparse the __amdil_is_constant() function. + Constant *samplerVal = gv->getInitializer(); + lInst->replaceAllUsesWith(samplerVal); + return true; +} + +bool +AMDILPeepholeOpt::doInitialization(Module &M) +{ + return false; +} + +bool +AMDILPeepholeOpt::doFinalization(Module &M) +{ + return false; +} + +void +AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const +{ + AU.addRequired(); + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} diff --git a/lib/Target/AMDIL/R600CodeEmitter.cpp b/lib/Target/AMDIL/R600CodeEmitter.cpp new file mode 100644 index 00000000000..8faf0deb8c5 --- /dev/null +++ b/lib/Target/AMDIL/R600CodeEmitter.cpp @@ -0,0 +1,749 @@ +//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "AMDILCodeEmitter.h" +#include "AMDILInstrInfo.h" +#include "AMDILUtilityFunctions.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetMachine.h" + +#include + +#define SRC_BYTE_COUNT 11 +#define DST_BYTE_COUNT 5 + +using namespace llvm; + +namespace { + + class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter { + + private: + + static char ID; + formatted_raw_ostream &_OS; + const TargetMachine * TM; + const MachineRegisterInfo * MRI; + const R600RegisterInfo * TRI; + bool evergreenEncoding; + + bool isReduction; + unsigned reductionElement; + bool isLast; + + unsigned section_start; + + public: + + R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), + _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false), + isLast(true) { } + + const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } + + bool runOnMachineFunction(MachineFunction &MF); + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + private: + + void emitALUInstr(MachineInstr &MI); + void emitSrc(const MachineOperand & MO); + void emitDst(const MachineOperand & MO); + void emitALU(MachineInstr &MI, unsigned numSrc); + void emitTexInstr(MachineInstr &MI); + void emitFCInstr(MachineInstr &MI); + + unsigned int getHWInst(const MachineInstr &MI); + + void emitNullBytes(unsigned int byteCount); + + void emitByte(unsigned int byte); + + void emitTwoBytes(uint32_t bytes); + + void emit(uint32_t value); + void emit(uint64_t value); + + unsigned getHWReg(unsigned regNo) const; + + unsigned getElement(unsigned regNo); + +}; + +} /* End anonymous namespace */ + +#define WRITE_MASK_X 0x1 +#define WRITE_MASK_Y 0x2 +#define WRITE_MASK_Z 0x4 +#define WRITE_MASK_W 0x8 + +enum RegElement { + ELEMENT_X = 0, + ELEMENT_Y, + ELEMENT_Z, + ELEMENT_W +}; + +enum InstrTypes { + INSTR_ALU = 0, + INSTR_TEX, + INSTR_FC, + INSTR_NATIVE, + INSTR_VTX +}; + +enum FCInstr { + FC_IF = 0, + FC_ELSE, + FC_ENDIF, + FC_BGNLOOP, + FC_ENDLOOP, + FC_BREAK, + FC_BREAK_NZ_INT, + FC_CONTINUE, + FC_BREAK_Z_INT +}; + +enum TextureTypes { + TEXTURE_1D = 1, + TEXTURE_2D, + TEXTURE_3D, + TEXTURE_CUBE, + TEXTURE_RECT, + TEXTURE_SHADOW1D, + TEXTURE_SHADOW2D, + TEXTURE_SHADOWRECT, + TEXTURE_1D_ARRAY, + TEXTURE_2D_ARRAY, + TEXTURE_SHADOW1D_ARRAY, + TEXTURE_SHADOW2D_ARRAY +}; + +char R600CodeEmitter::ID = 0; + +FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) { + return new R600CodeEmitter(OS); +} + +bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { + + TM = &MF.getTarget(); + MRI = &MF.getRegInfo(); + TRI = static_cast(TM->getRegisterInfo()); + const AMDILSubtarget &STM = TM->getSubtarget(); + std::string gpu = STM.getDeviceName(); + if (!gpu.compare(0,3, "rv7")) { + evergreenEncoding = false; + } else { + evergreenEncoding = true; + } + const AMDGPUTargetMachine *amdtm = + static_cast(&MF.getTarget()); + + if (amdtm->shouldDumpCode()) { + MF.dump(); + } + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { + continue; + } + if (isTexOp(MI.getOpcode())) { + emitTexInstr(MI); + } else if (isFCOp(MI.getOpcode())){ + emitFCInstr(MI); + } else if (isReductionOp(MI.getOpcode())) { + isReduction = true; + isLast = false; + for (reductionElement = 0; reductionElement < 4; reductionElement++) { + isLast = (reductionElement == 3); + emitALUInstr(MI); + } + isReduction = false; + } else if (MI.getOpcode() == AMDIL::RETURN || + MI.getOpcode() == AMDIL::BUNDLE || + MI.getOpcode() == AMDIL::KILL) { + continue; + } else { + switch(MI.getOpcode()) { + case AMDIL::RAT_WRITE_CACHELESS_eg: + { + /* XXX: Support for autoencoding 64-bit instructions was added + * in LLVM 3.1. Until we drop support for 3.0, we will use Magic + * numbers for the high bits. */ + uint64_t high = 0x95c0100000000000; + uint64_t inst = getBinaryCodeForInstr(MI); + inst |= high; + /* Set End Of Program bit */ + /* XXX: Need better check of end of program. EOP should be + * encoded in one of the operands of the MI, and it should be + * set in a prior pass. */ + MachineBasicBlock::iterator NextI = llvm::next(I); + MachineInstr &NextMI = *NextI; + if (NextMI.getOpcode() == AMDIL::RETURN) { + inst |= (((uint64_t)1) << 53); + } + emitByte(INSTR_NATIVE); + emit(inst); + break; + } + case AMDIL::VTX_READ_eg: + { + emitByte(INSTR_VTX); + /* inst */ + emitByte(0); + + /* fetch_type */ + emitByte(2); + + /* buffer_id */ + emitByte(MI.getOperand(2).getImm()); + + /* src_gpr */ + emitByte(getHWReg(MI.getOperand(1).getReg())); + + /* src_sel_x */ + emitByte(TRI->getHWRegChan(MI.getOperand(1).getReg())); + + /* mega_fetch_count */ + emitByte(3); + + /* dst_gpr */ + emitByte(getHWReg(MI.getOperand(0).getReg())); + + /* dst_sel_x */ + emitByte(0); + + /* dst_sel_y */ + emitByte(7); + + /* dst_sel_z */ + emitByte(7); + + /* dst_sel_w */ + emitByte(7); + + /* use_const_fields */ + emitByte(1); + + /* data_format */ + emitByte(0); + + /* num_format_all */ + emitByte(0); + + /* format_comp_all */ + emitByte(0); + + /* srf_mode_all */ + emitByte(0); + + /* offset */ + emitByte(0); + + /* endian */ + emitByte(0); + break; + } + + default: + emitALUInstr(MI); + break; + } + } + } + } + return false; +} + +void R600CodeEmitter::emitALUInstr(MachineInstr &MI) +{ + + unsigned numOperands = MI.getNumExplicitOperands(); + + /* Some instructions are just place holder instructions that represent + * operations that the GPU does automatically. They should be ignored. */ + if (isPlaceHolderOpcode(MI.getOpcode())) { + return; + } + + /* We need to handle some opcodes differently */ + switch (MI.getOpcode()) { + default: break; + + /* XXX: Temp Hack */ + case AMDIL::STORE_OUTPUT: + numOperands = 2; + break; + } + + /* XXX Check if instruction writes a result */ + if (numOperands < 1) { + return; + } + const MachineOperand dstOp = MI.getOperand(0); + + /* Emit instruction type */ + emitByte(0); + + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + /* Literal constants are always stored as the last operand. */ + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; + } + emitSrc(MI.getOperand(opIndex)); + } + + /* Emit zeros for unused sources */ + for ( ; opIndex < 4; opIndex++) { + emitNullBytes(SRC_BYTE_COUNT); + } + + emitDst(dstOp); + + emitALU(MI, numOperands - 1); +} + +void R600CodeEmitter::emitSrc(const MachineOperand & MO) +{ + uint32_t value = 0; + /* Emit the source select (2 bytes). For GPRs, this is the register index. + * For other potential instruction operands, (e.g. constant registers) the + * value of the source select is defined in the r600isa docs. */ + if (MO.isReg()) { + unsigned reg = MO.getReg(); + emitTwoBytes(getHWReg(reg)); + if (reg == AMDIL::ALU_LITERAL_X) { + const MachineInstr * parent = MO.getParent(); + unsigned immOpIndex = parent->getNumExplicitOperands() - 1; + MachineOperand immOp = parent->getOperand(immOpIndex); + if (immOp.isFPImm()) { + value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue(); + } else { + assert(immOp.isImm()); + value = immOp.getImm(); + } + } + } else { + /* XXX: Handle other operand types. */ + emitTwoBytes(0); + } + + /* Emit the source channel (1 byte) */ + if (isReduction) { + emitByte(reductionElement); + } else if (MO.isReg()) { + emitByte(TRI->getHWRegChan(MO.getReg())); + } else { + emitByte(0); + } + + /* XXX: Emit isNegated (1 byte) */ + if ((!(MO.getTargetFlags() & MO_FLAG_ABS)) + && (MO.getTargetFlags() & MO_FLAG_NEG || + (MO.isReg() && + (MO.getReg() == AMDIL::NEG_ONE || MO.getReg() == AMDIL::NEG_HALF)))){ + emitByte(1); + } else { + emitByte(0); + } + + /* Emit isAbsolute (1 byte) */ + if (MO.getTargetFlags() & MO_FLAG_ABS) { + emitByte(1); + } else { + emitByte(0); + } + + /* XXX: Emit relative addressing mode (1 byte) */ + emitByte(0); + + /* Emit kc_bank, This will be adjusted later by r600_asm */ + emitByte(0); + + /* Emit the literal value, if applicable (4 bytes). */ + emit(value); + +} + +void R600CodeEmitter::emitDst(const MachineOperand & MO) +{ + if (MO.isReg()) { + /* Emit the destination register index (1 byte) */ + emitByte(getHWReg(MO.getReg())); + + /* Emit the element of the destination register (1 byte)*/ + if (isReduction) { + emitByte(reductionElement); + } else { + emitByte(TRI->getHWRegChan(MO.getReg())); + } + + /* Emit isClamped (1 byte) */ + if (MO.getTargetFlags() & MO_FLAG_CLAMP) { + emitByte(1); + } else { + emitByte(0); + } + + /* Emit writemask (1 byte). */ + if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg())) + || MO.getTargetFlags() & MO_FLAG_MASK) { + emitByte(0); + } else { + emitByte(1); + } + + /* XXX: Emit relative addressing mode */ + emitByte(0); + } else { + /* XXX: Handle other operand types. Are there any for destination regs? */ + emitNullBytes(DST_BYTE_COUNT); + } +} + +void R600CodeEmitter::emitALU(MachineInstr &MI, unsigned numSrc) +{ + /* Emit the instruction (2 bytes) */ + emitTwoBytes(getHWInst(MI)); + + /* Emit isLast (for this instruction group) (1 byte) */ + if (isLast) { + emitByte(1); + } else { + emitByte(0); + } + /* Emit isOp3 (1 byte) */ + if (numSrc == 3) { + emitByte(1); + } else { + emitByte(0); + } + + /* XXX: Emit predicate (1 byte) */ + emitByte(0); + + /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like + * r600_asm.c sets it. */ + emitByte(0); + + /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */ + emitByte(0); + + /* XXX: Emit OMOD (1 byte) Not implemented. */ + emitByte(0); + + /* XXX: Emit index_mode. I think this is for indirect addressing, so we + * don't need to worry about it. */ + emitByte(0); +} + +void R600CodeEmitter::emitTexInstr(MachineInstr &MI) +{ + + int64_t sampler = MI.getOperand(2).getImm(); + int64_t textureType = MI.getOperand(3).getImm(); + unsigned opcode = MI.getOpcode(); + unsigned srcSelect[4] = {0, 1, 2, 3}; + + /* Emit instruction type */ + emitByte(1); + + /* Emit instruction */ + emitByte(getHWInst(MI)); + + /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */ + emitByte(sampler + 1 + 1); + + /* Emit source register */ + emitByte(getHWReg(MI.getOperand(1).getReg())); + + /* XXX: Emit src isRelativeAddress */ + emitByte(0); + + /* Emit destination register */ + emitByte(getHWReg(MI.getOperand(0).getReg())); + + /* XXX: Emit dst isRealtiveAddress */ + emitByte(0); + + /* XXX: Emit dst select */ + emitByte(0); /* X */ + emitByte(1); /* Y */ + emitByte(2); /* Z */ + emitByte(3); /* W */ + + /* XXX: Emit lod bias */ + emitByte(0); + + /* XXX: Emit coord types */ + unsigned coordType[4] = {1, 1, 1, 1}; + + if (textureType == TEXTURE_RECT + || textureType == TEXTURE_SHADOWRECT) { + coordType[ELEMENT_X] = 0; + coordType[ELEMENT_Y] = 0; + } + + if (textureType == TEXTURE_1D_ARRAY + || textureType == TEXTURE_SHADOW1D_ARRAY) { + if (opcode == AMDIL::TEX_SAMPLE_C_L || opcode == AMDIL::TEX_SAMPLE_C_LB) { + coordType[ELEMENT_Y] = 0; + } else { + coordType[ELEMENT_Z] = 0; + srcSelect[ELEMENT_Z] = ELEMENT_Y; + } + } else if (textureType == TEXTURE_2D_ARRAY + || textureType == TEXTURE_SHADOW2D_ARRAY) { + coordType[ELEMENT_Z] = 0; + } + + for (unsigned i = 0; i < 4; i++) { + emitByte(coordType[i]); + } + + /* XXX: Emit offsets */ + emitByte(0); /* X */ + emitByte(0); /* Y */ + emitByte(0); /* Z */ + /* There is no OFFSET_W */ + + /* Emit sampler id */ + emitByte(sampler); + + /* XXX:Emit source select */ + if ((textureType == TEXTURE_SHADOW1D + || textureType == TEXTURE_SHADOW2D + || textureType == TEXTURE_SHADOWRECT + || textureType == TEXTURE_SHADOW1D_ARRAY) + && opcode != AMDIL::TEX_SAMPLE_C_L + && opcode != AMDIL::TEX_SAMPLE_C_LB) { + srcSelect[ELEMENT_W] = ELEMENT_Z; + } + + for (unsigned i = 0; i < 4; i++) { + emitByte(srcSelect[i]); + } +} + +void R600CodeEmitter::emitFCInstr(MachineInstr &MI) +{ + /* Emit instruction type */ + emitByte(INSTR_FC); + + /* Emit SRC */ + unsigned numOperands = MI.getNumOperands(); + if (numOperands > 0) { + assert(numOperands == 1); + emitSrc(MI.getOperand(0)); + } else { + emitNullBytes(SRC_BYTE_COUNT); + } + + /* Emit FC Instruction */ + enum FCInstr instr; + switch (MI.getOpcode()) { + case AMDIL::BREAK_LOGICALZ_f32: + instr = FC_BREAK; + break; + case AMDIL::BREAK_LOGICALNZ_i32: + instr = FC_BREAK_NZ_INT; + break; + case AMDIL::BREAK_LOGICALZ_i32: + instr = FC_BREAK_Z_INT; + break; + case AMDIL::CONTINUE_LOGICALNZ_f32: + instr = FC_CONTINUE; + break; + /* XXX: This assumes that all IFs will be if (x != 0). If we add + * optimizations this might not be the case */ + case AMDIL::IF_LOGICALNZ_f32: + case AMDIL::IF_LOGICALNZ_i32: + instr = FC_IF; + break; + case AMDIL::IF_LOGICALZ_f32: + abort(); + break; + case AMDIL::ELSE: + instr = FC_ELSE; + break; + case AMDIL::ENDIF: + instr = FC_ENDIF; + break; + case AMDIL::ENDLOOP: + instr = FC_ENDLOOP; + break; + case AMDIL::WHILELOOP: + instr = FC_BGNLOOP; + break; + default: + abort(); + break; + } + emitByte(instr); +} + +#define INSTR_FLOAT2_V(inst, hw) \ + case AMDIL:: inst##_v4f32: \ + case AMDIL:: inst##_v2f32: return HW_INST2(hw); + +#define INSTR_FLOAT2_S(inst, hw) \ + case AMDIL:: inst##_f32: return HW_INST2(hw); + +#define INSTR_FLOAT2(inst, hw) \ + INSTR_FLOAT2_V(inst, hw) \ + INSTR_FLOAT2_S(inst, hw) + +unsigned int R600CodeEmitter::getHWInst(const MachineInstr &MI) +{ + + /* XXX: Lower these to MOV before the code emitter. */ + switch (MI.getOpcode()) { + case AMDIL::STORE_OUTPUT: + case AMDIL::VCREATE_v4i32: + case AMDIL::LOADCONST_i32: + case AMDIL::LOADCONST_f32: + case AMDIL::MOVE_v4i32: + /* Instructons to reinterpret bits as ... */ + case AMDIL::IL_ASINT_f32: + case AMDIL::IL_ASINT_i32: + case AMDIL::IL_ASFLOAT_f32: + case AMDIL::IL_ASFLOAT_i32: + return 0x19; + + default: + return getBinaryCodeForInstr(MI); + } +} + +void R600CodeEmitter::emitNullBytes(unsigned int byteCount) +{ + for (unsigned int i = 0; i < byteCount; i++) { + emitByte(0); + } +} + +void R600CodeEmitter::emitByte(unsigned int byte) +{ + _OS.write((uint8_t) byte & 0xff); +} +void R600CodeEmitter::emitTwoBytes(unsigned int bytes) +{ + _OS.write((uint8_t) (bytes & 0xff)); + _OS.write((uint8_t) ((bytes >> 8) & 0xff)); +} + +void R600CodeEmitter::emit(uint32_t value) +{ + for (unsigned i = 0; i < 4; i++) { + _OS.write((uint8_t) ((value >> (8 * i)) & 0xff)); + } +} + +void R600CodeEmitter::emit(uint64_t value) +{ + for (unsigned i = 0; i < 8; i++) { + emitByte((value >> (8 * i)) & 0xff); + } +} + +unsigned R600CodeEmitter::getHWReg(unsigned regNo) const +{ + unsigned hwReg; + + hwReg = TRI->getHWRegIndex(regNo); + if (AMDIL::R600_CReg32RegClass.contains(regNo)) { + hwReg += 512; + } + return hwReg; +} + +uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const +{ + if (MO.isReg()) { + return getHWReg(MO.getReg()); + } else { + return MO.getImm(); + } +} + + +RegElement maskBitToElement(unsigned int maskBit) +{ + switch (maskBit) { + case WRITE_MASK_X: return ELEMENT_X; + case WRITE_MASK_Y: return ELEMENT_Y; + case WRITE_MASK_Z: return ELEMENT_Z; + case WRITE_MASK_W: return ELEMENT_W; + default: + assert("Invalid maskBit"); + return ELEMENT_X; + } +} + +unsigned int dstSwizzleToWriteMask(unsigned swizzle) +{ + switch(swizzle) { + default: + case AMDIL_DST_SWIZZLE_DEFAULT: + return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE_X___: + return WRITE_MASK_X; + case AMDIL_DST_SWIZZLE_XY__: + return WRITE_MASK_X | WRITE_MASK_Y; + case AMDIL_DST_SWIZZLE_XYZ_: + return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z; + case AMDIL_DST_SWIZZLE_XYZW: + return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE__Y__: + return WRITE_MASK_Y; + case AMDIL_DST_SWIZZLE__YZ_: + return WRITE_MASK_Y | WRITE_MASK_Z; + case AMDIL_DST_SWIZZLE__YZW: + return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE___Z_: + return WRITE_MASK_Z; + case AMDIL_DST_SWIZZLE___ZW: + return WRITE_MASK_Z | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE____W: + return WRITE_MASK_W; + case AMDIL_DST_SWIZZLE_X_ZW: + return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE_XY_W: + return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE_X_Z_: + return WRITE_MASK_X | WRITE_MASK_Z; + case AMDIL_DST_SWIZZLE_X__W: + return WRITE_MASK_X | WRITE_MASK_W; + case AMDIL_DST_SWIZZLE__Y_W: + return WRITE_MASK_Y | WRITE_MASK_W; + } +} + +#include "AMDILGenCodeEmitter.inc" + diff --git a/lib/Target/AMDIL/R600KernelParameters.cpp b/lib/Target/AMDIL/R600KernelParameters.cpp new file mode 100644 index 00000000000..3fdf48a2bf2 --- /dev/null +++ b/lib/Target/AMDIL/R600KernelParameters.cpp @@ -0,0 +1,503 @@ +//===-- R600KernelParameters.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include +#include "R600KernelParameters.h" +#include "R600OpenCLUtils.h" +#include "llvm/Constants.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/TypeBuilder.h" +// #include "llvm/CodeGen/Function.h" + +namespace AMDILAS { +enum AddressSpaces { + PRIVATE_ADDRESS = 0, // Address space for private memory. + GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). + CONSTANT_ADDRESS = 2, // Address space for constant memory. + LOCAL_ADDRESS = 3, // Address space for local memory. + REGION_ADDRESS = 4, // Address space for region memory. + ADDRESS_NONE = 5, // Address space for unknown memory. + PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) + PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) + LAST_ADDRESS = 8 +}; +} + + +#include +#include + +using namespace llvm; +using namespace std; + +#define CONSTANT_CACHE_SIZE_DW 127 + +class R600KernelParameters : public llvm::FunctionPass +{ + const llvm::TargetData * TD; + LLVMContext* Context; + Module *mod; + + struct param + { + param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {} + + llvm::Value* val; + llvm::Value* ptr_val; + int offset_in_dw; + int size_in_dw; + + bool indirect; + + string specialType; + int specialID; + + int end() { return offset_in_dw + size_in_dw; } + /* The first 9 dwords are reserved for the grid sizes. */ + int get_rat_offset() { return 9 + offset_in_dw; } + }; + + std::vector params; + + int getLastSpecialID(const string& TypeName); + + int getListSize(); + void AddParam(llvm::Argument* arg); + int calculateArgumentSize(llvm::Argument* arg); + void RunAna(llvm::Function* fun); + void Replace(llvm::Function* fun); + bool isIndirect(Value* val, set& visited); + void Propagate(llvm::Function* fun); + void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false); + Value* ConstantRead(Function* fun, param& p); + Value* handleSpecial(Function* fun, param& p); + bool isSpecialType(Type*); + string getSpecialTypeName(Type*); +public: + static char ID; + R600KernelParameters() : FunctionPass(ID) {}; + R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {} +// bool runOnFunction (llvm::Function &F); + bool runOnFunction (llvm::Function &F); + void getAnalysisUsage(AnalysisUsage &AU) const; + const char *getPassName() const; + bool doInitialization(Module &M); + bool doFinalization(Module &M); +}; + +char R600KernelParameters::ID = 0; + +static RegisterPass X("kerparam", "OpenCL Kernel Parameter conversion", false, false); + +int R600KernelParameters::getLastSpecialID(const string& TypeName) +{ + int lastID = -1; + + for (vector::iterator i = params.begin(); i != params.end(); i++) + { + if (i->specialType == TypeName) + { + lastID = i->specialID; + } + } + + return lastID; +} + +int R600KernelParameters::getListSize() +{ + if (params.size() == 0) + { + return 0; + } + + return params.back().end(); +} + +bool R600KernelParameters::isIndirect(Value* val, set& visited) +{ + if (isa(val)) + { + return false; + } + + if (isa(val->getType())) + { + assert(0 and "Internal error"); + return false; + } + + if (visited.count(val)) + { + return false; + } + + visited.insert(val); + + if (isa(val)) + { + GetElementPtrInst* GEP = dyn_cast(val); + GetElementPtrInst::op_iterator i = GEP->op_begin(); + + for (i++; i != GEP->op_end(); i++) + { + if (!isa(*i)) + { + return true; + } + } + } + + for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++) + { + Value* v2 = dyn_cast(*i); + + if (v2) + { + if (isIndirect(v2, visited)) + { + return true; + } + } + } + + return false; +} + +void R600KernelParameters::AddParam(llvm::Argument* arg) +{ + param p; + + p.val = dyn_cast(arg); + p.offset_in_dw = getListSize(); + p.size_in_dw = calculateArgumentSize(arg); + + if (isa(arg->getType()) and arg->hasByValAttr()) + { + set visited; + p.indirect = isIndirect(p.val, visited); + } + + params.push_back(p); +} + +int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg) +{ + Type* t = arg->getType(); + + if (arg->hasByValAttr() and dyn_cast(t)) + { + t = dyn_cast(t)->getElementType(); + } + + int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4; + + assert(store_size_in_dw); + + return store_size_in_dw; +} + + +void R600KernelParameters::RunAna(llvm::Function* fun) +{ + assert(isOpenCLKernel(fun)); + + for (Function::arg_iterator i = fun->arg_begin(); i != fun->arg_end(); i++) + { + AddParam(i); + } + +} + +void R600KernelParameters::Replace(llvm::Function* fun) +{ + for (std::vector::iterator i = params.begin(); i != params.end(); i++) + { + Value *new_val; + + if (isSpecialType(i->val->getType())) + { + new_val = handleSpecial(fun, *i); + } + else + { + new_val = ConstantRead(fun, *i); + } + if (new_val) + { + i->val->replaceAllUsesWith(new_val); + } + } +} + +void R600KernelParameters::Propagate(llvm::Function* fun) +{ + for (std::vector::iterator i = params.begin(); i != params.end(); i++) + { + if (i->ptr_val) + { + Propagate(i->ptr_val, i->val->getName(), i->indirect); + } + } +} + +void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) +{ + LoadInst* load = dyn_cast(v); + GetElementPtrInst *GEP = dyn_cast(v); + + unsigned addrspace; + + if (indirect) + { + addrspace = AMDILAS::PARAM_I_ADDRESS; + } + else + { + addrspace = AMDILAS::PARAM_D_ADDRESS; + } + + if (GEP and GEP->getType()->getAddressSpace() != addrspace) + { + Value* op = GEP->getPointerOperand(); + + if (dyn_cast(op->getType())->getAddressSpace() != addrspace) + { + op = new BitCastInst(op, PointerType::get(dyn_cast(op->getType())->getElementType(), addrspace), name, dyn_cast(v)); + } + + vector params(GEP->idx_begin(), GEP->idx_end()); + + GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast(v)); + GEP2->setIsInBounds(GEP->isInBounds()); + v = dyn_cast(GEP2); + GEP->replaceAllUsesWith(GEP2); + GEP->eraseFromParent(); + load = NULL; + } + + if (load) + { + if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space + { + Value *orig_ptr = load->getPointerOperand(); + PointerType *orig_ptr_type = dyn_cast(orig_ptr->getType()); + + Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace); + + Value* new_ptr = orig_ptr; + + if (orig_ptr->getType() != new_ptr_type) + { + new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load); + } + + Value* new_load = new LoadInst(new_ptr, name, load); + load->replaceAllUsesWith(new_load); + load->eraseFromParent(); + } + + return; + } + + vector users(v->use_begin(), v->use_end()); + + for (int i = 0; i < int(users.size()); i++) + { + Value* v2 = dyn_cast(users[i]); + + if (v2) + { + Propagate(v2, name, indirect); + } + } +} + +Value* R600KernelParameters::ConstantRead(Function* fun, param& p) +{ + assert(fun->front().begin() != fun->front().end()); + + Instruction *first_inst = fun->front().begin(); + IRBuilder <> builder (first_inst); +/* First 3 dwords are reserved for the dimmension info */ + + if (!p.val->hasNUsesOrMore(1)) + { + return NULL; + } + unsigned addrspace; + + if (p.indirect) + { + addrspace = AMDILAS::PARAM_I_ADDRESS; + } + else + { + addrspace = AMDILAS::PARAM_D_ADDRESS; + } + + Argument *arg = dyn_cast(p.val); + Type * argType = p.val->getType(); + PointerType * argPtrType = dyn_cast(p.val->getType()); + + if (argPtrType and arg->hasByValAttr()) + { + Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace)); + Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst); + param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst); + p.ptr_val = param_ptr; + return param_ptr; + } + else + { + Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace)); + + Value* param_ptr = builder.CreateGEP(param_addr_space_ptr, + ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName()); + + Value* param_value = builder.CreateLoad(param_ptr, arg->getName()); + + return param_value; + } +} + +Value* R600KernelParameters::handleSpecial(Function* fun, param& p) +{ + string name = getSpecialTypeName(p.val->getType()); + int ID; + + assert(!name.empty()); + + if (name == "image2d_t" or name == "image3d_t") + { + int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t")); + + if (lastID == -1) + { + ID = 2; ///ID0 and ID1 are used internally by the driver + } + else + { + ID = lastID + 1; + } + } + else if (name == "sampler_t") + { + int lastID = getLastSpecialID("sampler_t"); + + if (lastID == -1) + { + ID = 0; + } + else + { + ID = lastID + 1; + } + } + else + { + ///TODO: give some error message + return NULL; + } + + p.specialType = name; + p.specialID = ID; + + Instruction *first_inst = fun->front().begin(); + + return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst); +} + + +bool R600KernelParameters::isSpecialType(Type* t) +{ + return !getSpecialTypeName(t).empty(); +} + +string R600KernelParameters::getSpecialTypeName(Type* t) +{ + PointerType *pt = dyn_cast(t); + StructType *st = NULL; + + if (pt) + { + st = dyn_cast(pt->getElementType()); + } + + if (st) + { + string prefix = "struct.opencl_builtin_type_"; + + string name = st->getName().str(); + + if (name.substr(0, prefix.length()) == prefix) + { + return name.substr(prefix.length(), name.length()); + } + } + + return ""; +} + + +bool R600KernelParameters::runOnFunction (Function &F) +{ + if (!isOpenCLKernel(&F)) + { + return false; + } + +// F.dump(); + + RunAna(&F); + Replace(&F); + Propagate(&F); + + mod->dump(); + return false; +} + +void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const +{ +// AU.addRequired(); + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} + +const char *R600KernelParameters::getPassName() const +{ + return "OpenCL Kernel parameter conversion to memory"; +} + +bool R600KernelParameters::doInitialization(Module &M) +{ + Context = &M.getContext(); + mod = &M; + + return false; +} + +bool R600KernelParameters::doFinalization(Module &M) +{ + return false; +} + +llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD) +{ + FunctionPass *p = new R600KernelParameters(TD); + + return p; +} + + diff --git a/lib/Target/AMDIL/R600KernelParameters.h b/lib/Target/AMDIL/R600KernelParameters.h new file mode 100644 index 00000000000..904a469a5f0 --- /dev/null +++ b/lib/Target/AMDIL/R600KernelParameters.h @@ -0,0 +1,28 @@ +//===-- R600KernelParameters.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef KERNELPARAMETERS_H +#define KERNELPARAMETERS_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Value.h" + +#include + +llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD); + + +#endif diff --git a/lib/Target/AMDIL/R600LowerInstructions.cpp b/lib/Target/AMDIL/R600LowerInstructions.cpp new file mode 100644 index 00000000000..fb5431d0eef --- /dev/null +++ b/lib/Target/AMDIL/R600LowerInstructions.cpp @@ -0,0 +1,502 @@ +//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" +#include "AMDGPUUtil.h" +#include "AMDIL.h" +#include "AMDILRegisterInfo.h" +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include + +using namespace llvm; + +namespace { + class R600LowerInstructionsPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + const R600InstrInfo * TII; + MachineRegisterInfo * MRI; + + void lowerFLT(MachineInstr &MI); + + void calcAddress(const MachineOperand &ptrOp, + const MachineOperand &indexOp, + unsigned indexReg, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void divMod(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + bool div = true) const; + + public: + R600LowerInstructionsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm), + TII(static_cast(tm.getInstrInfo())), + MRI(NULL) + { } + + const char *getPassName() const { return "R600 Lower Instructions"; } + virtual bool runOnMachineFunction(MachineFunction &MF); + + }; +} /* End anonymous namespace */ + +char R600LowerInstructionsPass::ID = 0; + +FunctionPass *llvm::createR600LowerInstructionsPass(TargetMachine &tm) { + return new R600LowerInstructionsPass(tm); +} + +bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) +{ + MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I) ) { + + MachineInstr &MI = *I; + switch(MI.getOpcode()) { + case AMDIL::FLT: + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(1)); + break; + + case AMDIL::ABS_i32: + { + unsigned setgt = MRI->createVirtualRegister( + &AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), + setgt) + .addOperand(MI.getOperand(1)) + .addReg(AMDIL::ZERO); + + unsigned add_int = MRI->createVirtualRegister( + &AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), + add_int) + .addReg(setgt) + .addOperand(MI.getOperand(1)); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::XOR_INT)) + .addOperand(MI.getOperand(0)) + .addReg(setgt) + .addReg(add_int); + + break; + } + + /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and + * remove the ABS instruction.*/ + case AMDIL::FABS_f32: + case AMDIL::ABS_f32: + MI.getOperand(1).addTargetFlag(MO_FLAG_ABS); + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + + case AMDIL::BINARY_OR_f32: + { + unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp0) + .addOperand(MI.getOperand(1)); + unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp1) + .addOperand(MI.getOperand(2)); + unsigned tmp2 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::BINARY_OR_i32), tmp2) + .addReg(tmp0) + .addReg(tmp1); + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::ITOF), MI.getOperand(0).getReg()) + .addReg(tmp2); + break; + } + case AMDIL::CMOVLOG_f32: + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(MI.getOpcode())) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(3)) + .addOperand(MI.getOperand(2)); + break; + + case AMDIL::CMOVLOG_i32: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(3)) + .addOperand(MI.getOperand(2)); + break; + + case AMDIL::CLAMP_f32: + { + MachineOperand lowOp = MI.getOperand(2); + MachineOperand highOp = MI.getOperand(3); + if (lowOp.isReg() && highOp.isReg() + && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) { + MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + } else { + /* XXX: Handle other cases */ + abort(); + } + break; + } + + case AMDIL::UDIV_i32: + divMod(MI, MBB, I); + break; + + /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */ +/* case AMDIL::DIV_INF_f32: + { + unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRF32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), + TM.getInstrInfo()->get(AMDIL::RECIP_CLAMPED), tmp0) + .addOperand(MI.getOperand(2)); + BuildMI(MBB, I, MBB.findDebugLoc(I), + TM.getInstrInfo()->get(AMDIL::MUL_IEEE_f32)) + .addOperand(MI.getOperand(0)) + .addReg(tmp0) + .addOperand(MI.getOperand(1)); + break; + } +*/ /* XXX: This is an optimization */ + + case AMDIL::GLOBALLOAD_f32: + case AMDIL::GLOBALLOAD_i32: + { + MachineOperand &ptrOperand = MI.getOperand(1); + MachineOperand &indexOperand = MI.getOperand(2); + unsigned indexReg = + MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass); + + /* Calculate the address with in the VTX buffer */ + calcAddress(ptrOperand, indexOperand, indexReg, MBB, I); + + /* Make sure the VTX_READ_eg writes to the X chan */ + MRI->setRegClass(MI.getOperand(0).getReg(), + &AMDIL::R600_TReg32_XRegClass); + + /* Add the VTX_READ_eg instruction */ + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDIL::VTX_READ_eg)) + .addOperand(MI.getOperand(0)) + .addReg(indexReg) + .addImm(1); + break; + } + + case AMDIL::GLOBALSTORE_i32: + case AMDIL::GLOBALSTORE_f32: + { + MachineOperand &ptrOperand = MI.getOperand(1); + MachineOperand &indexOperand = MI.getOperand(2); + unsigned rwReg = + MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass); + unsigned byteIndexReg = + MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned shiftReg = + MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned indexReg = + MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass); + + /* Move the store value to the correct register class */ + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY), rwReg) + .addOperand(MI.getOperand(0)); + + /* Calculate the address in the RAT */ + calcAddress(ptrOperand, indexOperand, byteIndexReg, MBB, I); + + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV), shiftReg) + .addReg(AMDIL::ALU_LITERAL_X) + .addImm(2); + + /* XXX: Check GPU family */ + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDIL::LSHR_eg), indexReg) + .addReg(byteIndexReg) + .addReg(shiftReg); + + /* XXX: Check GPU Family */ + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDIL::RAT_WRITE_CACHELESS_eg)) + .addReg(rwReg) + .addReg(indexReg) + .addImm(0); + break; + } + case AMDIL::ILT: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(1)); + break; + case AMDIL::LOADCONST_f32: + case AMDIL::LOADCONST_i32: + { + bool canInline = false; + unsigned inlineReg; + MachineOperand & dstOp = MI.getOperand(0); + MachineOperand & immOp = MI.getOperand(1); + if (immOp.isFPImm()) { + const ConstantFP * cfp = immOp.getFPImm(); + if (cfp->isZero()) { + canInline = true; + inlineReg = AMDIL::ZERO; + } else if (cfp->isExactlyValue(1.0f)) { + canInline = true; + inlineReg = AMDIL::ONE; + } else if (cfp->isExactlyValue(0.5f)) { + canInline = true; + inlineReg = AMDIL::HALF; + } + } + + if (canInline) { + MachineOperand * use = dstOp.getNextOperandForReg(); + /* The lowering operation for CLAMP needs to have the immediates + * as operands, so we must propagate them. */ + while (use) { + MachineOperand * next = use->getNextOperandForReg(); + if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) { + use->setReg(inlineReg); + } + use = next; + } + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY)) + .addOperand(dstOp) + .addReg(inlineReg); + } else { + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV)) + .addOperand(dstOp) + .addReg(AMDIL::ALU_LITERAL_X) + .addOperand(immOp); + } + break; + } + + case AMDIL::MASK_WRITE: + { + unsigned maskedRegister = MI.getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); + MachineInstr * defInstr = MRI->getVRegDef(maskedRegister); + MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); + def->addTargetFlag(MO_FLAG_MASK); + break; + } + + case AMDIL::NEGATE_i32: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT)) + .addOperand(MI.getOperand(0)) + .addReg(AMDIL::ZERO) + .addOperand(MI.getOperand(1)); + break; + + case AMDIL::NEG_f32: + { + MI.getOperand(1).addTargetFlag(MO_FLAG_NEG); + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(TII->getISAOpcode(AMDIL::MOV))) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + break; + } + + case AMDIL::SUB_f32: + { + MI.getOperand(2).addTargetFlag(MO_FLAG_NEG); + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(TII->getISAOpcode(AMDIL::ADD_f32))) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)); + break; + } + + default: + continue; + } + MI.eraseFromParent(); + } + } + return false; +} + +void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp, + const MachineOperand &indexOp, + unsigned indexReg, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const +{ + /* Optimize the case where the indexOperand is 0 */ + if (indexOp.isImm() && indexOp.getImm() == 0) { + assert(ptrOp.isReg()); + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDIL::COPY), indexReg) + .addOperand(ptrOp); + } else { + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDIL::ADD_INT), indexReg) + .addOperand(indexOp) + .addOperand(ptrOp); + } +} + +/* Mostly copied from tgsi_divmod() in r600_shader.c */ +void R600LowerInstructionsPass::divMod(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + bool div) const +{ + unsigned dst = MI.getOperand(0).getReg(); + MachineOperand &numerator = MI.getOperand(1); + MachineOperand &denominator = MI.getOperand(2); + /* rcp = RECIP(denominator) = 2^32 / denominator + e + * e is rounding error */ + unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp) + .addOperand(denominator); + + /* rcp_lo = lo(rcp * denominator) */ + unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo) + .addReg(rcp) + .addOperand(denominator); + + /* rcp_hi = HI (rcp * denominator) */ + unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi) + .addReg(rcp) + .addOperand(denominator); + + unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo) + .addReg(AMDIL::ZERO) + .addReg(rcp_lo); + + unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo) + .addReg(rcp_hi) + .addReg(neg_rcp_lo) + .addReg(rcp_lo); + + unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e) + .addReg(abs_rcp_lo) + .addReg(rcp); + + unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e) + .addReg(rcp) + .addReg(e); + + unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e) + .addReg(rcp) + .addReg(e); + + /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */ + unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0) + .addReg(rcp_hi) + .addReg(rcp_plus_e) + .addReg(rcp_sub_e); + + unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q) + .addReg(tmp0) + .addOperand(numerator); + + /* num_sub_r = q * denominator */ + unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), + num_sub_r) + .addReg(q) + .addOperand(denominator); + + unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r) + .addOperand(numerator) + .addReg(num_sub_r); + + unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den) + .addReg(r) + .addOperand(denominator); + + unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero) + .addOperand(numerator) + .addReg(num_sub_r); + + unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1) + .addReg(r_ge_den) + .addReg(r_ge_zero); + + unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); + if (div) { + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0) + .addReg(q) + .addReg(AMDIL::ONE_INT); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1) + .addReg(q) + .addReg(AMDIL::ONE_INT); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) + .addReg(tmp1) + .addReg(q) + .addReg(val0); + } else { + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0) + .addReg(r) + .addOperand(denominator); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1) + .addReg(r) + .addOperand(denominator); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) + .addReg(tmp1) + .addReg(r) + .addReg(val0); + } + + /* XXX: Do we need to set to MAX_INT if denominator is 0? */ + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst) + .addReg(r_ge_zero) + .addReg(val1) + .addReg(result); +} diff --git a/lib/Target/AMDIL/R600LowerShaderInstructions.cpp b/lib/Target/AMDIL/R600LowerShaderInstructions.cpp new file mode 100644 index 00000000000..394ee7006ce --- /dev/null +++ b/lib/Target/AMDIL/R600LowerShaderInstructions.cpp @@ -0,0 +1,143 @@ +//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPULowerShaderInstructions.h" +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + class R600LowerShaderInstructionsPass : public MachineFunctionPass, + public AMDGPULowerShaderInstructionsPass { + + private: + static char ID; + TargetMachine &TM; + + void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + void lowerLOAD_INPUT(MachineInstr & MI); + bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + + public: + R600LowerShaderInstructionsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Lower Shader Instructions"; } + }; +} /* End anonymous namespace */ + +char R600LowerShaderInstructionsPass::ID = 0; + +FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) { + return new R600LowerShaderInstructionsPass(tm); +} + +#define INSTR_CASE_FLOAT_V(inst) \ + case AMDIL:: inst##_v4f32: \ + +#define INSTR_CASE_FLOAT_S(inst) \ + case AMDIL:: inst##_f32: + +#define INSTR_CASE_FLOAT(inst) \ + INSTR_CASE_FLOAT_V(inst) \ + INSTR_CASE_FLOAT_S(inst) +bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) +{ + MRI = &MF.getRegInfo(); + + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + MachineInstr &MI = *I; + bool deleteInstr = false; + switch (MI.getOpcode()) { + + default: break; + + case AMDIL::RESERVE_REG: + case AMDIL::EXPORT_REG: + deleteInstr = true; + break; + + case AMDIL::LOAD_INPUT: + lowerLOAD_INPUT(MI); + deleteInstr = true; + break; + + case AMDIL::STORE_OUTPUT: + deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I); + break; + + } + + ++I; + + if (deleteInstr) { + MI.eraseFromParent(); + } + } + } + + return false; +} + +/* The goal of this function is to replace the virutal destination register of + * a LOAD_INPUT instruction with the correct physical register that will. + * + * XXX: I don't think this is the right way things assign physical registers, + * but I'm not sure of another way to do this. + */ +void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI) +{ + MachineOperand &dst = MI.getOperand(0); + MachineOperand &arg = MI.getOperand(1); + int64_t inputIndex = arg.getImm(); + const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); + unsigned newRegister = inputClass->getRegister(inputIndex); + unsigned dstReg = dst.getReg(); + + preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister, + dstReg); +} + +bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) +{ + MachineOperand &valueOp = MI.getOperand(1); + MachineOperand &indexOp = MI.getOperand(2); + unsigned valueReg = valueOp.getReg(); + int64_t outputIndex = indexOp.getImm(); + const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); + unsigned newRegister = outputClass->getRegister(outputIndex); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY), + newRegister) + .addReg(valueReg); + + if (!MRI->isLiveOut(newRegister)) + MRI->addLiveOut(newRegister); + + return true; + +} diff --git a/lib/Target/AMDIL/R600OpenCLUtils.h b/lib/Target/AMDIL/R600OpenCLUtils.h new file mode 100644 index 00000000000..91e41d63d0d --- /dev/null +++ b/lib/Target/AMDIL/R600OpenCLUtils.h @@ -0,0 +1,49 @@ +//===-- OpenCLUtils.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// +#ifndef OPENCLUTILS_H +#define OPENCLUTILS_H + +#include "llvm/Function.h" + +#include + +static bool isOpenCLKernel(const llvm::Function* fun) +{ + llvm::Module *mod = const_cast(fun)->getParent(); + llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); + + if (!md or !md->getNumOperands()) + { + return false; + } + + for (int i = 0; i < int(md->getNumOperands()); i++) + { + if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) + { + continue; + } + + assert(md->getOperand(i)->getNumOperands() == 1); + + if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) + { + return true; + } + } + + return false; +} + + +#endif diff --git a/lib/Target/AMDIL/SIAssignInterpRegs.cpp b/lib/Target/AMDIL/SIAssignInterpRegs.cpp new file mode 100644 index 00000000000..6fe29c6a3fe --- /dev/null +++ b/lib/Target/AMDIL/SIAssignInterpRegs.cpp @@ -0,0 +1,110 @@ +//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "AMDIL.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + class SIAssignInterpRegsPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + + public: + SIAssignInterpRegsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "SI Assign intrpolation registers"; } + }; +} // End anonymous namespace + +char SIAssignInterpRegsPass::ID = 0; + +#define INTERP_VALUES 16 + +struct interp_info { + bool enabled; + unsigned regs[3]; + unsigned reg_count; +}; + + +FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { + return new SIAssignInterpRegsPass(tm); +} + +bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) +{ + + struct interp_info InterpUse[INTERP_VALUES] = { + {false, {AMDIL::PERSP_SAMPLE_I, AMDIL::PERSP_SAMPLE_J}, 2}, + {false, {AMDIL::PERSP_CENTER_I, AMDIL::PERSP_CENTER_J}, 2}, + {false, {AMDIL::PERSP_CENTROID_I, AMDIL::PERSP_CENTROID_J}, 2}, + {false, {AMDIL::PERSP_I_W, AMDIL::PERSP_J_W, AMDIL::PERSP_1_W}, 3}, + {false, {AMDIL::LINEAR_SAMPLE_I, AMDIL::LINEAR_SAMPLE_J}, 2}, + {false, {AMDIL::LINEAR_CENTER_I, AMDIL::LINEAR_CENTER_J}, 2}, + {false, {AMDIL::LINEAR_CENTROID_I, AMDIL::LINEAR_CENTROID_J}, 2}, + {false, {AMDIL::LINE_STIPPLE_TEX_COORD}, 1}, + {false, {AMDIL::POS_X_FLOAT}, 1}, + {false, {AMDIL::POS_Y_FLOAT}, 1}, + {false, {AMDIL::POS_Z_FLOAT}, 1}, + {false, {AMDIL::POS_W_FLOAT}, 1}, + {false, {AMDIL::FRONT_FACE}, 1}, + {false, {AMDIL::ANCILLARY}, 1}, + {false, {AMDIL::SAMPLE_COVERAGE}, 1}, + {false, {AMDIL::POS_FIXED_PT}, 1} + }; + + SIMachineFunctionInfo * MFI = MF.getInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + /* First pass, mark the interpolation values that are used. */ + for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) { + for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count; + reg_idx++) { + InterpUse[interp_idx].enabled = + !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]); + } + } + + unsigned used_vgprs = 0; + + /* Second pass, replace with VGPRs. */ + for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) { + if (!InterpUse[interp_idx].enabled) { + continue; + } + MFI->spi_ps_input_addr |= (1 << interp_idx); + + for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count; + reg_idx++, used_vgprs++) { + unsigned new_reg = AMDIL::VReg_32RegClass.getRegister(used_vgprs); + unsigned virt_reg = MRI.createVirtualRegister(&AMDIL::VReg_32RegClass); + MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg); + AMDGPU::utilAddLiveIn(&MF, MRI, TM.getInstrInfo(), new_reg, virt_reg); + } + } + + return false; +} diff --git a/lib/Target/AMDIL/SICodeEmitter.cpp b/lib/Target/AMDIL/SICodeEmitter.cpp new file mode 100644 index 00000000000..ad494fae7c6 --- /dev/null +++ b/lib/Target/AMDIL/SICodeEmitter.cpp @@ -0,0 +1,274 @@ +//===-- SICodeEmitter.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "AMDILCodeEmitter.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetMachine.h" + +#include + +#define LITERAL_REG 255 +#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) +using namespace llvm; + +namespace { + + class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter { + + private: + static char ID; + formatted_raw_ostream &_OS; + const TargetMachine *TM; + void emitState(MachineFunction & MF); + void emitInstr(MachineInstr &MI); + + void outputBytes(uint64_t value, unsigned bytes); + unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift) + const; + + public: + SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), + _OS(OS), TM(NULL) { } + const char *getPassName() const { return "SI Code Emitter"; } + bool runOnMachineFunction(MachineFunction &MF); + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo) + const; + virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo) + const; + virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo) + const; + virtual uint64_t VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const; + }; +} + +char SICodeEmitter::ID = 0; + +FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) { + return new SICodeEmitter(OS); +} + +void SICodeEmitter::emitState(MachineFunction & MF) +{ + unsigned maxSGPR = 0; + unsigned maxVGPR = 0; + bool VCCUsed = false; + const SIRegisterInfo * RI = + static_cast(TM->getRegisterInfo()); + SIMachineFunctionInfo * MFI = MF.getInfo(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + unsigned numOperands = MI.getNumOperands(); + for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { + MachineOperand & MO = MI.getOperand(op_idx); + unsigned maxUsed; + unsigned width = 0; + bool isSGPR = false; + unsigned reg; + unsigned hwReg; + if (!MO.isReg()) { + continue; + } + reg = MO.getReg(); + if (reg == AMDIL::VCC) { + VCCUsed = true; + continue; + } + if (AMDIL::SReg_32RegClass.contains(reg)) { + isSGPR = true; + width = 1; + } else if (AMDIL::VReg_32RegClass.contains(reg)) { + isSGPR = false; + width = 1; + } else if (AMDIL::SReg_64RegClass.contains(reg)) { + isSGPR = true; + width = 2; + } else if (AMDIL::VReg_64RegClass.contains(reg)) { + isSGPR = false; + width = 2; + } else if (AMDIL::SReg_128RegClass.contains(reg)) { + isSGPR = true; + width = 4; + } else if (AMDIL::VReg_128RegClass.contains(reg)) { + isSGPR = false; + width = 4; + } else if (AMDIL::SReg_256RegClass.contains(reg)) { + isSGPR = true; + width = 8; + } else { + assert("!Unknown register class"); + } + hwReg = RI->getHWRegNum(reg); + maxUsed = ((hwReg + 1) * width) - 1; + if (isSGPR) { + maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR; + } else { + maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR; + } + } + } + } + if (VCCUsed) { + maxSGPR += 2; + } + outputBytes(maxSGPR + 1, 4); + outputBytes(maxVGPR + 1, 4); + outputBytes(MFI->spi_ps_input_addr, 4); +} + +bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) +{ + MF.dump(); + TM = &MF.getTarget(); + const AMDGPUInstrInfo * TII = + static_cast(TM->getInstrInfo()); + + emitState(MF); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL + && MI.getOpcode() != AMDIL::RETURN) { + emitInstr(MI); + } + } + } + return false; +} + +void SICodeEmitter::emitInstr(MachineInstr &MI) +{ + const SIInstrInfo * SII = static_cast(TM->getInstrInfo()); + + uint64_t hwInst = getBinaryCodeForInstr(MI); + + if ((hwInst & 0xffffffff) == 0xffffffff) { + fprintf(stderr, "Unsupported Instruction: \n"); + MI.dump(); + abort(); + } + +// hwInst |= SII->getBinaryCode(MI); + + unsigned bytes = SII->getEncodingBytes(MI); + outputBytes(hwInst, bytes); +} + +uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const +{ + const SIRegisterInfo * RI = + static_cast(TM->getRegisterInfo()); + + switch(MO.getType()) { + case MachineOperand::MO_Register: + return RI->getBinaryCode(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MO.getImm(); + + case MachineOperand::MO_FPImmediate: + /* XXX: Not all instructions can use inline literals */ + /* XXX: We should make sure this is a 32-bit constant */ + return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32); + default: + llvm_unreachable("Encoding of this operand type is not supported yet."); + break; + } +} + +unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo, + unsigned shift) const +{ + const SIRegisterInfo * RI = + static_cast(TM->getRegisterInfo()); + unsigned regCode = RI->getHWRegNum(MI.getOperand(OpNo).getReg()); + return regCode >> shift; +} + +unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return GPRAlign(MI, OpNo, 2); +} + +unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return GPRAlign(MI, OpNo, 1); +} + +uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32); +} + +/* Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR. + * XXX: It would be nice if we could handle this without a PostEncode function. + */ +uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const +{ + const SIInstrInfo * SII = static_cast(TM->getInstrInfo()); + unsigned encodingType = SII->getEncodingType(MI); + unsigned numSrcOps; + unsigned vgprBitOffset; + + if (encodingType == SIInstrEncodingType::VOP3) { + numSrcOps = 3; + vgprBitOffset = 32; + } else { + numSrcOps = 1; + vgprBitOffset = 0; + } + + /* Add one to skip over the destination reg operand. */ + for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { + if (!MI.getOperand(opIdx).isReg()) { + continue; + } + unsigned reg = MI.getOperand(opIdx).getReg(); + if (AMDIL::VReg_32RegClass.contains(reg) + || AMDIL::VReg_64RegClass.contains(reg)) { + Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; + } + } + return Value; +} + + +void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes) +{ + for (unsigned i = 0; i < bytes; i++) { + _OS.write((uint8_t) ((value >> (8 * i)) & 0xff)); + } +} diff --git a/lib/Target/AMDIL/SILowerShaderInstructions.cpp b/lib/Target/AMDIL/SILowerShaderInstructions.cpp new file mode 100644 index 00000000000..5d49d88dc7c --- /dev/null +++ b/lib/Target/AMDIL/SILowerShaderInstructions.cpp @@ -0,0 +1,90 @@ +//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "AMDGPU.h" +#include "AMDGPULowerShaderInstructions.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + class SILowerShaderInstructionsPass : public MachineFunctionPass, + public AMDGPULowerShaderInstructionsPass { + + private: + static char ID; + TargetMachine &TM; + + public: + SILowerShaderInstructionsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "SI Lower Shader Instructions"; } + + void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + }; +} /* End anonymous namespace */ + +char SILowerShaderInstructionsPass::ID = 0; + +FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) { + return new SILowerShaderInstructionsPass(tm); +} + +bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) +{ + MRI = &MF.getRegInfo(); + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I) ) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + case AMDIL::RETURN: + lowerRETURN(MBB, I); + break; + case AMDIL::SET_M0: + lowerSET_M0(MI, MBB, I); + break; + default: continue; + } + MI.removeFromParent(); + } + } + + return false; +} + +void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) +{ + const struct TargetInstrInfo * TII = TM.getInstrInfo(); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM)); +} + +void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) +{ + const struct TargetInstrInfo * TII = TM.getInstrInfo(); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32)) + .addReg(AMDIL::M0) + .addOperand(MI.getOperand(1)); +} diff --git a/lib/Target/AMDIL/SIPropagateImmReads.cpp b/lib/Target/AMDIL/SIPropagateImmReads.cpp new file mode 100644 index 00000000000..4f925d5de1c --- /dev/null +++ b/lib/Target/AMDIL/SIPropagateImmReads.cpp @@ -0,0 +1,70 @@ +//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +namespace { + class SIPropagateImmReadsPass : public MachineFunctionPass { + + private: + static char ID; + TargetMachine &TM; + + public: + SIPropagateImmReadsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} /* End anonymous namespace */ + +char SIPropagateImmReadsPass::ID = 0; + +FunctionPass *llvm::createSIPropagateImmReadsPass(TargetMachine &tm) { + return new SIPropagateImmReadsPass(tm); +} + +bool SIPropagateImmReadsPass::runOnMachineFunction(MachineFunction &MF) +{ + const SIInstrInfo * TII = static_cast(TM.getInstrInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I)) { + MachineInstr &MI = *I; + + switch (MI.getOpcode()) { + case AMDIL::LOADCONST_f32: + case AMDIL::LOADCONST_i32: + break; + default: + continue; + } + + /* XXX: Create and use S_MOV_IMM for SREGs */ + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::V_MOV_IMM)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + + MI.eraseFromParent(); + } + } + return false; +} -- cgit v1.2.3