diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-05-15 14:04:59 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-05-21 17:55:35 +0800 |
commit | 4722ef3cde032584f8295b87c470ca874162a733 (patch) | |
tree | 87f83d97062662ee9df38a03decf8881bd4c70df | |
parent | 31f28c8d9852fafdeb113a60597a5dfd60b89683 (diff) |
GBE: preparation to mix simd16 into simd8 kernel.mixsimd16
This patch modify the scalarize pass to get the IR layer
vector information. And pass that information to backend.
backend will create two types of selection vector. one is
for general selection vector which must be in contiguous
region, and the other is IR layer vector which is better
to be in contiguous register region.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 54 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hpp | 44 | ||||
-rw-r--r-- | backend/src/backend/gen_reg_allocation.cpp | 81 | ||||
-rw-r--r-- | backend/src/ir/function.hpp | 26 | ||||
-rw-r--r-- | backend/src/ir/unit.cpp | 1 | ||||
-rw-r--r-- | backend/src/ir/unit.hpp | 39 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 14 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.hpp | 3 | ||||
-rw-r--r-- | backend/src/llvm/llvm_scalarize.cpp | 13 | ||||
-rw-r--r-- | backend/src/llvm/llvm_to_gen.cpp | 2 |
10 files changed, 226 insertions, 51 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 745bb199..1a0827bf 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -187,14 +187,6 @@ namespace gbe } /////////////////////////////////////////////////////////////////////////// - // SelectionVector - /////////////////////////////////////////////////////////////////////////// - - SelectionVector::SelectionVector(void) : - insn(NULL), reg(NULL), regNum(0), isSrc(0) - {} - - /////////////////////////////////////////////////////////////////////////// // SelectionBlock /////////////////////////////////////////////////////////////////////////// @@ -1058,9 +1050,13 @@ namespace gbe insn->extra.elem = bti; SelectionVector *vector = this->appendVector(); +#if 0 vector->regNum = srcNum; vector->reg = &insn->src(0); vector->isSrc = 1; +#else + vector->setVectorReg(&insn->src(0), srcNum, true); +#endif } void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } @@ -1090,6 +1086,7 @@ namespace gbe insn->extra.elem = valueNum; // Only the temporary registers need contiguous allocation +#if 0 dstVector->regNum = elemNum - valueNum; dstVector->isSrc = 0; dstVector->reg = &insn->dst(0); @@ -1098,6 +1095,10 @@ namespace gbe srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); +#else + dstVector->setVectorReg(&insn->dst(0), elemNum - valueNum, false); + srcVector->setVectorReg(&insn->src(0), 1, true); +#endif } void Selection::Opaque::UNTYPED_READ(Reg addr, @@ -1116,7 +1117,7 @@ namespace gbe insn->src(0) = addr; insn->extra.function = bti; insn->extra.elem = elemNum; - +#if 0 // Sends require contiguous allocation dstVector->regNum = elemNum; dstVector->isSrc = 0; @@ -1125,6 +1126,10 @@ namespace gbe srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); +#else + dstVector->setVectorReg(&insn->dst(0), elemNum, false); + srcVector->setVectorReg(&insn->src(0), 1, true); +#endif } /* elemNum contains all the temporary register and the @@ -1149,9 +1154,13 @@ namespace gbe insn->extra.elem = srcNum; // Only the addr + temporary registers need to be contiguous. +#if 0 vector->regNum = dstNum; vector->reg = &insn->dst(0); vector->isSrc = 1; +#else + vector->setVectorReg(&insn->dst(0), dstNum, false); +#endif } void Selection::Opaque::UNTYPED_WRITE(Reg addr, @@ -1170,9 +1179,13 @@ namespace gbe insn->extra.elem = elemNum; // Sends require contiguous allocation for the sources +#if 0 vector->regNum = elemNum+1; vector->reg = &insn->src(0); vector->isSrc = 1; +#else + vector->setVectorReg(&insn->dst(0), elemNum + 1, true); +#endif } void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) { @@ -1190,12 +1203,17 @@ namespace gbe // byte gather requires vector in the sense that scalar are not allowed // (yet) +#if 0 dstVector->regNum = 1; dstVector->isSrc = 0; dstVector->reg = &insn->dst(0); srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); +#else + dstVector->setVectorReg(&insn->dst(0), 1, false); + srcVector->setVectorReg(&insn->src(0), 1, true); +#endif } void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) { @@ -1209,9 +1227,13 @@ namespace gbe insn->extra.elem = elemSize; // value and address are contiguous in the send +#if 0 vector->regNum = 2; vector->isSrc = 1; vector->reg = &insn->src(0); +#else + vector->setVectorReg(&insn->src(0), 2, true); +#endif } void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) { @@ -1224,12 +1246,17 @@ namespace gbe insn->src(0) = addr; insn->dst(0) = dst; insn->extra.function = bti; +#if 0 vector->regNum = 1; vector->isSrc = 0; vector->reg = &insn->dst(0); srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); +#else + vector->setVectorReg(&insn->dst(0), 1, false); + srcVector->setVectorReg(&insn->src(0), 1, true); +#endif } void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) { @@ -1600,6 +1627,7 @@ namespace gbe for (uint32_t elemID = 0; elemID < msgNum; ++elemID) insn->src(elemID) = msgPayloads[elemID]; +#if 0 // Sends require contiguous allocation dstVector->regNum = dstNum; dstVector->isSrc = 0; @@ -1609,6 +1637,10 @@ namespace gbe msgVector->regNum = msgNum; msgVector->isSrc = 1; msgVector->reg = &insn->src(0); +#else + dstVector->setVectorReg(&insn->dst(0), dstNum, false); + msgVector->setVectorReg(&insn->src(0), msgNum, true); +#endif insn->extra.rdbti = bti; insn->extra.sampler = sampler; @@ -1638,10 +1670,14 @@ namespace gbe insn->extra.bti = bti; insn->extra.msglen = msgNum; insn->extra.is3DWrite = is3D; +#if 0 // Sends require contiguous allocation msgVector->regNum = msgNum; msgVector->isSrc = 1; msgVector->reg = &insn->src(0); +#else + msgVector->setVectorReg(&insn->src(0), msgNum, true); +#endif } Selection::~Selection(void) { GBE_DELETE(this->opaque); } diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 1f48b235..1163b0d3 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -149,23 +149,53 @@ namespace gbe friend class Selection; }; + // Owns the selection block + class Selection; + /*! Instructions like sends require to make registers contiguous in GRF */ class SelectionVector : public NonCopyable, public intrusive_list_node { public: - SelectionVector(void); + SelectionVector(void) : insn(NULL), reg(NULL), regNum(0), + issrc(false), isirvector(false) {} + /*! The instruction that requires the vector of registers */ SelectionInstruction *insn; - /*! Directly points to the selection instruction registers */ - GenRegister *reg; + const ir::Register getReg(uint32_t regID) const { + return isirvector ? irReg[regID] : reg[regID].reg(); + } + const uint32_t getNum(void) const { return regNum; } + bool isSrc(void) const { return issrc; } + bool isIRVector(void) const { return isirvector; } + + INLINE void setVectorReg(const ir::Register *ir, uint32_t num) { + irReg = ir; + regNum = num; + isirvector = true; + } + + private: + union { + /*! Directly points to the selection instruction registers, only exist when isIRVector == false */ + const GenRegister *reg; + /*! is used to represent IR vector, only exist when isIRVector == true*/ + const ir::Register *irReg; + }; + INLINE void setVectorReg(GenRegister *r, uint32_t num, bool src) { + reg = r; + regNum = num; + isirvector = false; + issrc = src; + } /*! Number of registers in the vector */ uint16_t regNum; /*! Indicate if this a destination or a source vector */ - uint16_t isSrc; - }; + bool issrc; + /*! Indicate if this is a LLVM IR layer vector. */ + bool isirvector; + friend class Selection; - // Owns the selection block - class Selection; + }; /*! A selection block is the counterpart of the IR Basic block. It contains * the instructions generated from an IR basic block diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index f642c2e4..f5dd9381 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -154,10 +154,11 @@ namespace gbe map<ir::Register, uint32_t> RA; /*! Map offset to virtual registers. */ map<uint32_t, ir::Register> offsetReg; - /*! Provides the position of each register in a vector */ + /*! Provides the position of each register in a selection vector */ map<ir::Register, VectorLocation> vectorMap; /*! All vectors used in the selection */ vector<SelectionVector*> vectors; + vector<vector<ir::Register>*> irVectors; /*! The set of booleans that will go to GRF (cannot be kept into flags) */ set<ir::Register> grfBooleans; /*! The set of booleans which be held in flags, don't need to allocate grf */ @@ -265,7 +266,7 @@ namespace gbe } bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const { - const ir::Register first = vector->reg[0].reg(); + const ir::Register first = vector->getReg(0); const auto it = vectorMap.find(first); // If the first register is not allocated we are done @@ -276,15 +277,15 @@ namespace gbe // still registers to allocate const SelectionVector *other = it->second.first; const uint32_t otherFirst = it->second.second; - const uint32_t leftNum = other->regNum - otherFirst; - if (leftNum < vector->regNum) + const uint32_t leftNum = other->getNum() - otherFirst; + if (leftNum < vector->getNum()) return false; // Now check that all the registers in the already allocated vector match // the current vector - for (uint32_t regID = 1; regID < vector->regNum; ++regID) { - const ir::Register from = vector->reg[regID].reg(); - const ir::Register to = other->reg[regID + otherFirst].reg(); + for (uint32_t regID = 1; regID < vector->getNum(); ++regID) { + const ir::Register from = vector->getReg(regID); + const ir::Register to = other->getReg(regID + otherFirst); if (from != to) return false; } @@ -292,8 +293,8 @@ namespace gbe } void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) { - for (uint32_t regID = 0; regID < vector->regNum; ++regID) { - const ir::Register reg = vector->reg[regID].reg(); + for (uint32_t regID = 0; regID < vector->getNum(); ++regID) { + const ir::Register reg = vector->getReg(regID); const auto it = this->vectorMap.find(reg); // case 1: the register is not already in a vector, so it can stay in this // vector. Note that local IDs are *non-scalar* special registers but will @@ -315,21 +316,43 @@ namespace gbe // and the order is maintained, we can reuse the previous vector and avoid // the MOVs else { - ir::Register tmp; - tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc); - const VectorLocation location = std::make_pair(vector, regID); - this->vectorMap.insert(std::make_pair(tmp, location)); + if (!vector->isIRVector()) { + ir::Register tmp; + tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc()); + const VectorLocation location = std::make_pair(vector, regID); + this->vectorMap.insert(std::make_pair(tmp, location)); + } } } } /*! Will sort vector in decreasing order */ inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) { - return v0->regNum > v1->regNum; + return v0->getNum() > v1->getNum(); } void GenRegAllocator::Opaque::allocateVector(Selection &selection) { - const uint32_t vectorNum = selection.getVectorNum(); + + // First we collect all the IR layer vector to a temporary array. + const ir::IRVectorMap *irVectorMap = ctx.getFunction().getIRVectorMap(); + vector<SelectionVector *> SelIRVectors; + if (ctx.getSimdWidth() == 8) { + for (auto &it : *irVectorMap) { + uint32_t i = 0; + const ir::IRVector *iv = &it.second; + while(i < iv->regNum) { + // Collect a 2 elements vector is enough for the mix simd16 optimization. + if (iv->regNum - i >= 2) { + SelectionVector *sv = new SelectionVector(); + sv->setVectorReg(&(iv->regs[i]), 2); + SelIRVectors.push_back(sv); + i += 2; + } else + break; + } + } + } + const uint32_t vectorNum = selection.getVectorNum() + SelIRVectors.size(); this->vectors.resize(vectorNum); // First we find and store all vectors @@ -337,6 +360,10 @@ namespace gbe for (auto &block : *selection.blockList) for (auto &v : block.vectorList) this->vectors[vectorID++] = &v; + // add ir vectors into the array. + for (auto &v : SelIRVectors) + this->vectors[vectorID++] = v; + GBE_ASSERT(vectorID == vectorNum); // Heuristic (really simple...): sort them by the number of registers they @@ -670,25 +697,25 @@ namespace gbe if (it != vectorMap.end()) { const SelectionVector *vector = it->second.first; // all the reg in the SelectionVector are spilled - if(spilledRegs.find(vector->reg[0].reg()) + if(spilledRegs.find(vector->getReg(0)) != spilledRegs.end()) continue; uint32_t alignment; ir::RegisterFamily family; getRegAttrib(reg, alignment, &family); - const uint32_t size = vector->regNum * alignment; + const uint32_t size = vector->getNum() * alignment; const uint32_t grfOffset = allocateReg(interval, size, alignment); if(grfOffset == 0) { GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD)); - for(int i = vector->regNum-1; i >= 0; i--) { - if (!spillReg(vector->reg[i].reg())) + for(int i = vector->getNum()-1; i >= 0; i--) { + if (!spillReg(vector->getReg(i))) return false; } continue; } - for (uint32_t regID = 0; regID < vector->regNum; ++regID) { - const ir::Register reg = vector->reg[regID].reg(); + for (uint32_t regID = 0; regID < vector->getNum(); ++regID) { + const ir::Register reg = vector->getReg(regID); GBE_ASSERT(RA.contains(reg) == false && ctx.sel->getRegisterData(reg).family == family); insertNewReg(reg, grfOffset + alignment * regID, true); @@ -837,8 +864,8 @@ namespace gbe // If a partial of a vector is expired, the vector will be unspillable, currently. // FIXME we may need to fix those unspillable vector in the furture. INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) { - for(uint32_t id = 0; id < vector->regNum; id++) - if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)]) + for(uint32_t id = 0; id < vector->getNum(); id++) + if (spillCandidate.find(intervals[(ir::Register)(vector->getReg(id))]) == spillCandidate.end()) return false; return true; @@ -872,11 +899,11 @@ namespace gbe if (isVector && (vectorCanSpill(vectorIt->second.first))) { const SelectionVector *vector = vectorIt->second.first; - for (uint32_t id = 0; id < vector->regNum; id++) { - GBE_ASSERT(spilledRegs.find(vector->reg[id].reg()) + for (uint32_t id = 0; id < vector->getNum(); id++) { + GBE_ASSERT(spilledRegs.find(vector->getReg(id)) == spilledRegs.end()); - spillSet.insert(vector->reg[id].reg()); - reg = vector->reg[id].reg(); + reg = vector->getReg(id); + spillSet.insert(reg); family = ctx.sel->getRegisterFamily(reg); size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8 : GEN_REG_SIZE * ctx.getSimdWidth()/8; diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 266e6526..5fa95266 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -147,6 +147,14 @@ namespace ir { GBE_STRUCT(Loop); }; + /*! Map of all IR vector. */ + typedef struct IRVector { + IRVector() : regNum(0) { for(uint32_t i = 0; i < 16; i++) regs[i] = (Register) -1; } + uint32_t regNum; + Register regs[16]; + } IRVector; + typedef map<const void *, IRVector> IRVectorMap; + /*! A function is : * - a register file * - a set of basic block layout into a CGF @@ -340,6 +348,22 @@ namespace ir { /*! add the loop info for later liveness analysis */ void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits); INLINE const vector<Loop * > &getLoops() { return loops; } + /* Get reg vectors which indicate which registers are in a logical vector. */ + INLINE const IRVectorMap *getIRVectorMap(void) const { return &irVectorMap; } + INLINE void insertIRVectorElement(const void *vectorValue, uint32_t id, Register reg) { + auto it = irVectorMap.find(vectorValue); + if (it != irVectorMap.end()) { + GBE_ASSERT(it->second.regs[id] == (Register) -1); + it->second.regs[id] = reg; + it->second.regNum++; + } else { + IRVector ir; + ir.regNum = 1; + ir.regs[id] = reg; + irVectorMap.insert(std::make_pair(vectorValue, ir)); + } + } + private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -350,6 +374,8 @@ namespace ir { vector<Immediate> immediates; //!< All immediate values in the function vector<BasicBlock*> blocks; //!< All chained basic blocks vector<Loop *> loops; //!< Loops info of the function + IRVectorMap irVectorMap; //!< IR vectors map + vector<vector<Register>> IRVectors; //!< IR vectors RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp index 4f9d7400..0718a830 100644 --- a/backend/src/ir/unit.cpp +++ b/backend/src/ir/unit.cpp @@ -30,6 +30,7 @@ namespace ir { Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize), valid(true) {} Unit::~Unit(void) { for (const auto &pair : functions) GBE_DELETE(pair.second); + for (const auto it : vectorMaps) delete it.second; } Function *Unit::getFunction(const std::string &name) const { auto it = functions.find(name); diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp index adebd3f3..94db4be2 100644 --- a/backend/src/ir/unit.hpp +++ b/backend/src/ir/unit.hpp @@ -42,6 +42,13 @@ namespace ir { { public: typedef hash_map<std::string, Function*> FunctionSet; + typedef struct VectorIndex{ + VectorIndex(const void *v, uint32_t id) : vectorValue(v), id(id) {} + const void *vectorValue; + uint32_t id; + } VectorIndex; + typedef map<const void *, VectorIndex> VectorMap; //!< a heuristic for mix simd16 optimization. + /*! Create an empty unit */ Unit(PointerSize pointerSize = POINTER_32_BITS); /*! Release everything (*including* the function pointers) */ @@ -74,8 +81,40 @@ namespace ir { const ConstantSet& getConstantSet(void) const { return constantSet; } void setValid(bool value) { valid = value; } bool getValid() { return valid; } + /*! set curr llvm function, for scalarize and gen pass. */ + void setCurrLLVMFunction(void *f) { function = f; } + /*! insert a new vector element. */ + void insertVectorElement(const void *vectorValue, const void *value, int id) { + GBE_ASSERT(function != NULL); + auto it = vectorMaps.find(function); + VectorMap *vectorMap; + if (it != vectorMaps.end()) + vectorMap = it->second; + else { + vectorMap = new VectorMap(); + vectorMaps.insert(std::make_pair(function, vectorMap)); + } + VectorIndex vi(vectorValue, id); + vectorMap->insert(std::make_pair(value, vi)); + } + /*! get a value's vector index information. */ + const VectorIndex *getVectorIndex(void *valueKey) { + auto it = vectorMaps.find(function); + if (it == vectorMaps.end()) + return NULL; + auto vectorMap = it->second; + auto vi = vectorMap->find(valueKey); + return vi != vectorMap->end() ? &vi->second : NULL; + } + void clearVectorMap(void) { + auto it = vectorMaps.find(function); + if (it != vectorMaps.end()) + it->second->clear(); + } private: friend class ContextInterface; //!< Can free modify the unit + const void * function; //!< current llvm function. + map<const void *, map<const void *, VectorIndex>*> vectorMaps; hash_map<std::string, Function*> functions; //!< All the defined functions ConstantSet constantSet; //!< All the constants defined in the unit PointerSize pointerSize; //!< Size shared by all pointers diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 82429d04..fbd125af 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -483,6 +483,7 @@ namespace gbe // definitions outside the translation unit. if (F.hasAvailableExternallyLinkage()) return false; + unit.setCurrLLVMFunction(&F); // As we inline all function calls, so skip non-kernel functions bool bKernel = isKernelFunction(F); @@ -875,14 +876,21 @@ namespace gbe case Type::FloatTyID: case Type::DoubleTyID: case Type::PointerTyID: - regTranslator.newScalar(value, key, 0, uniform); + { + auto reg = regTranslator.newScalar(value, key, 0, uniform); + auto vi = unit.getVectorIndex(key == NULL ? value : key); + if (vi != NULL) + this->ctx.getFunction().insertIRVectorElement(vi->vectorValue, vi->id, reg); break; + } case Type::VectorTyID: { auto vectorType = cast<VectorType>(type); const uint32_t elemNum = vectorType->getNumElements(); - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) - regTranslator.newScalar(value, key, elemID, uniform); + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + auto reg = regTranslator.newScalar(value, key, elemID, uniform); + this->ctx.getFunction().insertIRVectorElement(value, elemID, reg); + } break; } default: NOT_SUPPORTED; diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp index 26323a3e..80c2a5ff 100644 --- a/backend/src/llvm/llvm_gen_backend.hpp +++ b/backend/src/llvm/llvm_gen_backend.hpp @@ -30,6 +30,7 @@ #include "sys/platform.hpp" #include "sys/map.hpp" #include "sys/hash_map.hpp" +#include "ir/unit.hpp" #include <algorithm> // LLVM Type @@ -88,7 +89,7 @@ namespace gbe llvm::BasicBlockPass *createLoadStoreOptimizationPass(); /*! Scalarize all vector op instructions */ - llvm::FunctionPass* createScalarizePass(); + llvm::FunctionPass* createScalarizePass(ir::Unit * unit = NULL); /*! Remove/add NoDuplicate function attribute for barrier functions. */ llvm::ModulePass* createBarrierNodupPass(bool); diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index 73817e2d..70dddffa 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -93,6 +93,7 @@ #include "llvm/llvm_gen_backend.hpp" #include "sys/map.hpp" +#include "ir/unit.hpp" using namespace llvm; @@ -124,7 +125,7 @@ namespace gbe { // Standard pass stuff static char ID; - Scalarize() : FunctionPass(ID) + Scalarize(ir::Unit *unit = NULL) : FunctionPass(ID), unit(unit) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 @@ -231,6 +232,7 @@ namespace gbe { builder->SetInsertPoint(++next); } + ir::Unit *unit; DenseMap<Value*, VectorValues> vectorVals; Module* module; IRBuilder<>* builder; @@ -465,6 +467,7 @@ namespace gbe { gatherComponents(i, args, callArgs); Instruction* res = createScalarInstruction(inst, callArgs); + if (unit) unit->insertVectorElement(inst, res, i); vVals.setComponent(i, res); builder->Insert(res); @@ -765,6 +768,10 @@ namespace gbe { bool Scalarize::runOnFunction(Function& F) { + if (unit) { + unit->setCurrLLVMFunction(&F); + unit->clearVectorMap(); + } switch (F.getCallingConv()) { #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 case CallingConv::PTX_Device: @@ -856,9 +863,9 @@ namespace gbe { { return; } - FunctionPass* createScalarizePass() + FunctionPass* createScalarizePass(ir::Unit *unit) { - return new Scalarize(); + return new Scalarize(unit); } char Scalarize::ID = 0; diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 9282b3f3..80f6bd67 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -204,7 +204,7 @@ namespace gbe passes.add(createLowerSwitchPass()); passes.add(createPromoteMemoryToRegisterPass()); passes.add(createGVNPass()); // Remove redundancies - passes.add(createScalarizePass()); // Expand all vector ops + passes.add(createScalarizePass(&unit)); // Expand all vector ops passes.add(createDeadInstEliminationPass()); // Remove simplified instructions passes.add(createCFGSimplificationPass()); // Merge & remove BBs passes.add(createScalarizePass()); // Expand all vector ops |