summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-05-15 14:04:59 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-05-21 17:55:35 +0800
commit4722ef3cde032584f8295b87c470ca874162a733 (patch)
tree87f83d97062662ee9df38a03decf8881bd4c70df
parent31f28c8d9852fafdeb113a60597a5dfd60b89683 (diff)
GBE: preparation to mix simd16 into simd8 kernel.mixsimd16
This patch modify the scalarize pass to get the IR layer vector information. And pass that information to backend. backend will create two types of selection vector. one is for general selection vector which must be in contiguous region, and the other is IR layer vector which is better to be in contiguous register region. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp54
-rw-r--r--backend/src/backend/gen_insn_selection.hpp44
-rw-r--r--backend/src/backend/gen_reg_allocation.cpp81
-rw-r--r--backend/src/ir/function.hpp26
-rw-r--r--backend/src/ir/unit.cpp1
-rw-r--r--backend/src/ir/unit.hpp39
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp14
-rw-r--r--backend/src/llvm/llvm_gen_backend.hpp3
-rw-r--r--backend/src/llvm/llvm_scalarize.cpp13
-rw-r--r--backend/src/llvm/llvm_to_gen.cpp2
10 files changed, 226 insertions, 51 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 745bb199..1a0827bf 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -187,14 +187,6 @@ namespace gbe
}
///////////////////////////////////////////////////////////////////////////
- // SelectionVector
- ///////////////////////////////////////////////////////////////////////////
-
- SelectionVector::SelectionVector(void) :
- insn(NULL), reg(NULL), regNum(0), isSrc(0)
- {}
-
- ///////////////////////////////////////////////////////////////////////////
// SelectionBlock
///////////////////////////////////////////////////////////////////////////
@@ -1058,9 +1050,13 @@ namespace gbe
insn->extra.elem = bti;
SelectionVector *vector = this->appendVector();
+#if 0
vector->regNum = srcNum;
vector->reg = &insn->src(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->src(0), srcNum, true);
+#endif
}
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
@@ -1090,6 +1086,7 @@ namespace gbe
insn->extra.elem = valueNum;
// Only the temporary registers need contiguous allocation
+#if 0
dstVector->regNum = elemNum - valueNum;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
@@ -1098,6 +1095,10 @@ namespace gbe
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), elemNum - valueNum, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::UNTYPED_READ(Reg addr,
@@ -1116,7 +1117,7 @@ namespace gbe
insn->src(0) = addr;
insn->extra.function = bti;
insn->extra.elem = elemNum;
-
+#if 0
// Sends require contiguous allocation
dstVector->regNum = elemNum;
dstVector->isSrc = 0;
@@ -1125,6 +1126,10 @@ namespace gbe
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), elemNum, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
/* elemNum contains all the temporary register and the
@@ -1149,9 +1154,13 @@ namespace gbe
insn->extra.elem = srcNum;
// Only the addr + temporary registers need to be contiguous.
+#if 0
vector->regNum = dstNum;
vector->reg = &insn->dst(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->dst(0), dstNum, false);
+#endif
}
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -1170,9 +1179,13 @@ namespace gbe
insn->extra.elem = elemNum;
// Sends require contiguous allocation for the sources
+#if 0
vector->regNum = elemNum+1;
vector->reg = &insn->src(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->dst(0), elemNum + 1, true);
+#endif
}
void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) {
@@ -1190,12 +1203,17 @@ namespace gbe
// byte gather requires vector in the sense that scalar are not allowed
// (yet)
+#if 0
dstVector->regNum = 1;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), 1, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) {
@@ -1209,9 +1227,13 @@ namespace gbe
insn->extra.elem = elemSize;
// value and address are contiguous in the send
+#if 0
vector->regNum = 2;
vector->isSrc = 1;
vector->reg = &insn->src(0);
+#else
+ vector->setVectorReg(&insn->src(0), 2, true);
+#endif
}
void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) {
@@ -1224,12 +1246,17 @@ namespace gbe
insn->src(0) = addr;
insn->dst(0) = dst;
insn->extra.function = bti;
+#if 0
vector->regNum = 1;
vector->isSrc = 0;
vector->reg = &insn->dst(0);
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ vector->setVectorReg(&insn->dst(0), 1, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) {
@@ -1600,6 +1627,7 @@ namespace gbe
for (uint32_t elemID = 0; elemID < msgNum; ++elemID)
insn->src(elemID) = msgPayloads[elemID];
+#if 0
// Sends require contiguous allocation
dstVector->regNum = dstNum;
dstVector->isSrc = 0;
@@ -1609,6 +1637,10 @@ namespace gbe
msgVector->regNum = msgNum;
msgVector->isSrc = 1;
msgVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), dstNum, false);
+ msgVector->setVectorReg(&insn->src(0), msgNum, true);
+#endif
insn->extra.rdbti = bti;
insn->extra.sampler = sampler;
@@ -1638,10 +1670,14 @@ namespace gbe
insn->extra.bti = bti;
insn->extra.msglen = msgNum;
insn->extra.is3DWrite = is3D;
+#if 0
// Sends require contiguous allocation
msgVector->regNum = msgNum;
msgVector->isSrc = 1;
msgVector->reg = &insn->src(0);
+#else
+ msgVector->setVectorReg(&insn->src(0), msgNum, true);
+#endif
}
Selection::~Selection(void) { GBE_DELETE(this->opaque); }
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 1f48b235..1163b0d3 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -149,23 +149,53 @@ namespace gbe
friend class Selection;
};
+ // Owns the selection block
+ class Selection;
+
/*! Instructions like sends require to make registers contiguous in GRF */
class SelectionVector : public NonCopyable, public intrusive_list_node
{
public:
- SelectionVector(void);
+ SelectionVector(void) : insn(NULL), reg(NULL), regNum(0),
+ issrc(false), isirvector(false) {}
+
/*! The instruction that requires the vector of registers */
SelectionInstruction *insn;
- /*! Directly points to the selection instruction registers */
- GenRegister *reg;
+ const ir::Register getReg(uint32_t regID) const {
+ return isirvector ? irReg[regID] : reg[regID].reg();
+ }
+ const uint32_t getNum(void) const { return regNum; }
+ bool isSrc(void) const { return issrc; }
+ bool isIRVector(void) const { return isirvector; }
+
+ INLINE void setVectorReg(const ir::Register *ir, uint32_t num) {
+ irReg = ir;
+ regNum = num;
+ isirvector = true;
+ }
+
+ private:
+ union {
+ /*! Directly points to the selection instruction registers, only exist when isIRVector == false */
+ const GenRegister *reg;
+ /*! is used to represent IR vector, only exist when isIRVector == true*/
+ const ir::Register *irReg;
+ };
+ INLINE void setVectorReg(GenRegister *r, uint32_t num, bool src) {
+ reg = r;
+ regNum = num;
+ isirvector = false;
+ issrc = src;
+ }
/*! Number of registers in the vector */
uint16_t regNum;
/*! Indicate if this a destination or a source vector */
- uint16_t isSrc;
- };
+ bool issrc;
+ /*! Indicate if this is a LLVM IR layer vector. */
+ bool isirvector;
+ friend class Selection;
- // Owns the selection block
- class Selection;
+ };
/*! A selection block is the counterpart of the IR Basic block. It contains
* the instructions generated from an IR basic block
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e4..f5dd9381 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -154,10 +154,11 @@ namespace gbe
map<ir::Register, uint32_t> RA;
/*! Map offset to virtual registers. */
map<uint32_t, ir::Register> offsetReg;
- /*! Provides the position of each register in a vector */
+ /*! Provides the position of each register in a selection vector */
map<ir::Register, VectorLocation> vectorMap;
/*! All vectors used in the selection */
vector<SelectionVector*> vectors;
+ vector<vector<ir::Register>*> irVectors;
/*! The set of booleans that will go to GRF (cannot be kept into flags) */
set<ir::Register> grfBooleans;
/*! The set of booleans which be held in flags, don't need to allocate grf */
@@ -265,7 +266,7 @@ namespace gbe
}
bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const {
- const ir::Register first = vector->reg[0].reg();
+ const ir::Register first = vector->getReg(0);
const auto it = vectorMap.find(first);
// If the first register is not allocated we are done
@@ -276,15 +277,15 @@ namespace gbe
// still registers to allocate
const SelectionVector *other = it->second.first;
const uint32_t otherFirst = it->second.second;
- const uint32_t leftNum = other->regNum - otherFirst;
- if (leftNum < vector->regNum)
+ const uint32_t leftNum = other->getNum() - otherFirst;
+ if (leftNum < vector->getNum())
return false;
// Now check that all the registers in the already allocated vector match
// the current vector
- for (uint32_t regID = 1; regID < vector->regNum; ++regID) {
- const ir::Register from = vector->reg[regID].reg();
- const ir::Register to = other->reg[regID + otherFirst].reg();
+ for (uint32_t regID = 1; regID < vector->getNum(); ++regID) {
+ const ir::Register from = vector->getReg(regID);
+ const ir::Register to = other->getReg(regID + otherFirst);
if (from != to)
return false;
}
@@ -292,8 +293,8 @@ namespace gbe
}
void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) {
- for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
- const ir::Register reg = vector->reg[regID].reg();
+ for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+ const ir::Register reg = vector->getReg(regID);
const auto it = this->vectorMap.find(reg);
// case 1: the register is not already in a vector, so it can stay in this
// vector. Note that local IDs are *non-scalar* special registers but will
@@ -315,21 +316,43 @@ namespace gbe
// and the order is maintained, we can reuse the previous vector and avoid
// the MOVs
else {
- ir::Register tmp;
- tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
- const VectorLocation location = std::make_pair(vector, regID);
- this->vectorMap.insert(std::make_pair(tmp, location));
+ if (!vector->isIRVector()) {
+ ir::Register tmp;
+ tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc());
+ const VectorLocation location = std::make_pair(vector, regID);
+ this->vectorMap.insert(std::make_pair(tmp, location));
+ }
}
}
}
/*! Will sort vector in decreasing order */
inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) {
- return v0->regNum > v1->regNum;
+ return v0->getNum() > v1->getNum();
}
void GenRegAllocator::Opaque::allocateVector(Selection &selection) {
- const uint32_t vectorNum = selection.getVectorNum();
+
+ // First we collect all the IR layer vector to a temporary array.
+ const ir::IRVectorMap *irVectorMap = ctx.getFunction().getIRVectorMap();
+ vector<SelectionVector *> SelIRVectors;
+ if (ctx.getSimdWidth() == 8) {
+ for (auto &it : *irVectorMap) {
+ uint32_t i = 0;
+ const ir::IRVector *iv = &it.second;
+ while(i < iv->regNum) {
+ // Collect a 2 elements vector is enough for the mix simd16 optimization.
+ if (iv->regNum - i >= 2) {
+ SelectionVector *sv = new SelectionVector();
+ sv->setVectorReg(&(iv->regs[i]), 2);
+ SelIRVectors.push_back(sv);
+ i += 2;
+ } else
+ break;
+ }
+ }
+ }
+ const uint32_t vectorNum = selection.getVectorNum() + SelIRVectors.size();
this->vectors.resize(vectorNum);
// First we find and store all vectors
@@ -337,6 +360,10 @@ namespace gbe
for (auto &block : *selection.blockList)
for (auto &v : block.vectorList)
this->vectors[vectorID++] = &v;
+ // add ir vectors into the array.
+ for (auto &v : SelIRVectors)
+ this->vectors[vectorID++] = v;
+
GBE_ASSERT(vectorID == vectorNum);
// Heuristic (really simple...): sort them by the number of registers they
@@ -670,25 +697,25 @@ namespace gbe
if (it != vectorMap.end()) {
const SelectionVector *vector = it->second.first;
// all the reg in the SelectionVector are spilled
- if(spilledRegs.find(vector->reg[0].reg())
+ if(spilledRegs.find(vector->getReg(0))
!= spilledRegs.end())
continue;
uint32_t alignment;
ir::RegisterFamily family;
getRegAttrib(reg, alignment, &family);
- const uint32_t size = vector->regNum * alignment;
+ const uint32_t size = vector->getNum() * alignment;
const uint32_t grfOffset = allocateReg(interval, size, alignment);
if(grfOffset == 0) {
GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
- for(int i = vector->regNum-1; i >= 0; i--) {
- if (!spillReg(vector->reg[i].reg()))
+ for(int i = vector->getNum()-1; i >= 0; i--) {
+ if (!spillReg(vector->getReg(i)))
return false;
}
continue;
}
- for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
- const ir::Register reg = vector->reg[regID].reg();
+ for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+ const ir::Register reg = vector->getReg(regID);
GBE_ASSERT(RA.contains(reg) == false
&& ctx.sel->getRegisterData(reg).family == family);
insertNewReg(reg, grfOffset + alignment * regID, true);
@@ -837,8 +864,8 @@ namespace gbe
// If a partial of a vector is expired, the vector will be unspillable, currently.
// FIXME we may need to fix those unspillable vector in the furture.
INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) {
- for(uint32_t id = 0; id < vector->regNum; id++)
- if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
+ for(uint32_t id = 0; id < vector->getNum(); id++)
+ if (spillCandidate.find(intervals[(ir::Register)(vector->getReg(id))])
== spillCandidate.end())
return false;
return true;
@@ -872,11 +899,11 @@ namespace gbe
if (isVector
&& (vectorCanSpill(vectorIt->second.first))) {
const SelectionVector *vector = vectorIt->second.first;
- for (uint32_t id = 0; id < vector->regNum; id++) {
- GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
+ for (uint32_t id = 0; id < vector->getNum(); id++) {
+ GBE_ASSERT(spilledRegs.find(vector->getReg(id))
== spilledRegs.end());
- spillSet.insert(vector->reg[id].reg());
- reg = vector->reg[id].reg();
+ reg = vector->getReg(id);
+ spillSet.insert(reg);
family = ctx.sel->getRegisterFamily(reg);
size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
: GEN_REG_SIZE * ctx.getSimdWidth()/8;
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 266e6526..5fa95266 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -147,6 +147,14 @@ namespace ir {
GBE_STRUCT(Loop);
};
+ /*! Map of all IR vector. */
+ typedef struct IRVector {
+ IRVector() : regNum(0) { for(uint32_t i = 0; i < 16; i++) regs[i] = (Register) -1; }
+ uint32_t regNum;
+ Register regs[16];
+ } IRVector;
+ typedef map<const void *, IRVector> IRVectorMap;
+
/*! A function is :
* - a register file
* - a set of basic block layout into a CGF
@@ -340,6 +348,22 @@ namespace ir {
/*! add the loop info for later liveness analysis */
void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
INLINE const vector<Loop * > &getLoops() { return loops; }
+ /* Get reg vectors which indicate which registers are in a logical vector. */
+ INLINE const IRVectorMap *getIRVectorMap(void) const { return &irVectorMap; }
+ INLINE void insertIRVectorElement(const void *vectorValue, uint32_t id, Register reg) {
+ auto it = irVectorMap.find(vectorValue);
+ if (it != irVectorMap.end()) {
+ GBE_ASSERT(it->second.regs[id] == (Register) -1);
+ it->second.regs[id] = reg;
+ it->second.regNum++;
+ } else {
+ IRVector ir;
+ ir.regNum = 1;
+ ir.regs[id] = reg;
+ irVectorMap.insert(std::make_pair(vectorValue, ir));
+ }
+ }
+
private:
friend class Context; //!< Can freely modify a function
std::string name; //!< Function name
@@ -350,6 +374,8 @@ namespace ir {
vector<Immediate> immediates; //!< All immediate values in the function
vector<BasicBlock*> blocks; //!< All chained basic blocks
vector<Loop *> loops; //!< Loops info of the function
+ IRVectorMap irVectorMap; //!< IR vectors map
+ vector<vector<Register>> IRVectors; //!< IR vectors
RegisterFile file; //!< RegisterDatas used by the instructions
Profile profile; //!< Current function profile
PushMap pushMap; //!< Pushed function arguments (reg->loc)
diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp
index 4f9d7400..0718a830 100644
--- a/backend/src/ir/unit.cpp
+++ b/backend/src/ir/unit.cpp
@@ -30,6 +30,7 @@ namespace ir {
Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize), valid(true) {}
Unit::~Unit(void) {
for (const auto &pair : functions) GBE_DELETE(pair.second);
+ for (const auto it : vectorMaps) delete it.second;
}
Function *Unit::getFunction(const std::string &name) const {
auto it = functions.find(name);
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index adebd3f3..94db4be2 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -42,6 +42,13 @@ namespace ir {
{
public:
typedef hash_map<std::string, Function*> FunctionSet;
+ typedef struct VectorIndex{
+ VectorIndex(const void *v, uint32_t id) : vectorValue(v), id(id) {}
+ const void *vectorValue;
+ uint32_t id;
+ } VectorIndex;
+ typedef map<const void *, VectorIndex> VectorMap; //!< a heuristic for mix simd16 optimization.
+
/*! Create an empty unit */
Unit(PointerSize pointerSize = POINTER_32_BITS);
/*! Release everything (*including* the function pointers) */
@@ -74,8 +81,40 @@ namespace ir {
const ConstantSet& getConstantSet(void) const { return constantSet; }
void setValid(bool value) { valid = value; }
bool getValid() { return valid; }
+ /*! set curr llvm function, for scalarize and gen pass. */
+ void setCurrLLVMFunction(void *f) { function = f; }
+ /*! insert a new vector element. */
+ void insertVectorElement(const void *vectorValue, const void *value, int id) {
+ GBE_ASSERT(function != NULL);
+ auto it = vectorMaps.find(function);
+ VectorMap *vectorMap;
+ if (it != vectorMaps.end())
+ vectorMap = it->second;
+ else {
+ vectorMap = new VectorMap();
+ vectorMaps.insert(std::make_pair(function, vectorMap));
+ }
+ VectorIndex vi(vectorValue, id);
+ vectorMap->insert(std::make_pair(value, vi));
+ }
+ /*! get a value's vector index information. */
+ const VectorIndex *getVectorIndex(void *valueKey) {
+ auto it = vectorMaps.find(function);
+ if (it == vectorMaps.end())
+ return NULL;
+ auto vectorMap = it->second;
+ auto vi = vectorMap->find(valueKey);
+ return vi != vectorMap->end() ? &vi->second : NULL;
+ }
+ void clearVectorMap(void) {
+ auto it = vectorMaps.find(function);
+ if (it != vectorMaps.end())
+ it->second->clear();
+ }
private:
friend class ContextInterface; //!< Can free modify the unit
+ const void * function; //!< current llvm function.
+ map<const void *, map<const void *, VectorIndex>*> vectorMaps;
hash_map<std::string, Function*> functions; //!< All the defined functions
ConstantSet constantSet; //!< All the constants defined in the unit
PointerSize pointerSize; //!< Size shared by all pointers
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 82429d04..fbd125af 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -483,6 +483,7 @@ namespace gbe
// definitions outside the translation unit.
if (F.hasAvailableExternallyLinkage())
return false;
+ unit.setCurrLLVMFunction(&F);
// As we inline all function calls, so skip non-kernel functions
bool bKernel = isKernelFunction(F);
@@ -875,14 +876,21 @@ namespace gbe
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::PointerTyID:
- regTranslator.newScalar(value, key, 0, uniform);
+ {
+ auto reg = regTranslator.newScalar(value, key, 0, uniform);
+ auto vi = unit.getVectorIndex(key == NULL ? value : key);
+ if (vi != NULL)
+ this->ctx.getFunction().insertIRVectorElement(vi->vectorValue, vi->id, reg);
break;
+ }
case Type::VectorTyID:
{
auto vectorType = cast<VectorType>(type);
const uint32_t elemNum = vectorType->getNumElements();
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- regTranslator.newScalar(value, key, elemID, uniform);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ auto reg = regTranslator.newScalar(value, key, elemID, uniform);
+ this->ctx.getFunction().insertIRVectorElement(value, elemID, reg);
+ }
break;
}
default: NOT_SUPPORTED;
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
index 26323a3e..80c2a5ff 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -30,6 +30,7 @@
#include "sys/platform.hpp"
#include "sys/map.hpp"
#include "sys/hash_map.hpp"
+#include "ir/unit.hpp"
#include <algorithm>
// LLVM Type
@@ -88,7 +89,7 @@ namespace gbe
llvm::BasicBlockPass *createLoadStoreOptimizationPass();
/*! Scalarize all vector op instructions */
- llvm::FunctionPass* createScalarizePass();
+ llvm::FunctionPass* createScalarizePass(ir::Unit * unit = NULL);
/*! Remove/add NoDuplicate function attribute for barrier functions. */
llvm::ModulePass* createBarrierNodupPass(bool);
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 73817e2d..70dddffa 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -93,6 +93,7 @@
#include "llvm/llvm_gen_backend.hpp"
#include "sys/map.hpp"
+#include "ir/unit.hpp"
using namespace llvm;
@@ -124,7 +125,7 @@ namespace gbe {
// Standard pass stuff
static char ID;
- Scalarize() : FunctionPass(ID)
+ Scalarize(ir::Unit *unit = NULL) : FunctionPass(ID), unit(unit)
{
initializeLoopInfoPass(*PassRegistry::getPassRegistry());
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
@@ -231,6 +232,7 @@ namespace gbe {
builder->SetInsertPoint(++next);
}
+ ir::Unit *unit;
DenseMap<Value*, VectorValues> vectorVals;
Module* module;
IRBuilder<>* builder;
@@ -465,6 +467,7 @@ namespace gbe {
gatherComponents(i, args, callArgs);
Instruction* res = createScalarInstruction(inst, callArgs);
+ if (unit) unit->insertVectorElement(inst, res, i);
vVals.setComponent(i, res);
builder->Insert(res);
@@ -765,6 +768,10 @@ namespace gbe {
bool Scalarize::runOnFunction(Function& F)
{
+ if (unit) {
+ unit->setCurrLLVMFunction(&F);
+ unit->clearVectorMap();
+ }
switch (F.getCallingConv()) {
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
case CallingConv::PTX_Device:
@@ -856,9 +863,9 @@ namespace gbe {
{
return;
}
- FunctionPass* createScalarizePass()
+ FunctionPass* createScalarizePass(ir::Unit *unit)
{
- return new Scalarize();
+ return new Scalarize(unit);
}
char Scalarize::ID = 0;
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 9282b3f3..80f6bd67 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -204,7 +204,7 @@ namespace gbe
passes.add(createLowerSwitchPass());
passes.add(createPromoteMemoryToRegisterPass());
passes.add(createGVNPass()); // Remove redundancies
- passes.add(createScalarizePass()); // Expand all vector ops
+ passes.add(createScalarizePass(&unit)); // Expand all vector ops
passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
passes.add(createCFGSimplificationPass()); // Merge & remove BBs
passes.add(createScalarizePass()); // Expand all vector ops