summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Xiuli <xiuli.pan@intel.com>2017-03-23 15:13:01 +0800
committerYang Rong <rong.r.yang@intel.com>2017-03-23 17:42:10 +0800
commita7dc7692a4f00f33c09a2ad5bc7b58bb9fdd43e5 (patch)
tree27ea64285dfe507c9fd84b6b651cf96589db7cec
parentfcad355084485b643506357bd940a59652433bf7 (diff)
Backend: Add hole reuse in reg alloction
We first find regs that have pool in simple linear scale, and save them in HoleRegPool, when allocte regs we first try to search fit candidate in the pool and choose the most fit one to reuse. V2: Refine hole reuse only in one block. V3: Refine data structure with less variable, add OCL_REUSE_HOLE_REG to control the optimization. V4: Spilt the patch into instruction ID part and hole reuse, refine the blockID of the reg. V5: Refine some variable and function name. Add check for not spill the hole regs that already been used. V6: Fix some case when the dst is partial write. V7: Fix hole spill dead loop. Signed-off-by: Pan Xiuli <xiuli.pan@intel.com> Reviewed-by: Ruiling Song <ruiling.song@intel.com>
-rw-r--r--backend/src/backend/gen_reg_allocation.cpp127
-rw-r--r--backend/src/backend/gen_reg_allocation.hpp11
2 files changed, 121 insertions, 17 deletions
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index d88b316e..9183a24b 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -50,12 +50,13 @@ namespace gbe
struct GenRegInterval {
INLINE GenRegInterval(ir::Register reg) :
reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
- conflictReg(0), b3OpAlign(0) {}
+ blockID(-1), conflictReg(0), b3OpAlign(0), usedHole(false), isHole(false){}
ir::Register reg; //!< (virtual) register of the interval
int32_t minID, maxID; //!< Starting and ending points
int32_t accessCount;
+ int32_t blockID; //!< blockID for in-block regs that can reuse hole
ir::Register conflictReg; // < has banck conflict with this register
- bool b3OpAlign;
+ bool b3OpAlign, usedHole, isHole;
};
struct SpillInterval {
@@ -127,7 +128,7 @@ namespace gbe
/*! Allocate the vectors detected in the instruction selection pass */
void allocateVector(Selection &selection);
/*! Allocate the given interval. Return true if success */
- bool createGenReg(const Selection &selection, const GenRegInterval &interval);
+ bool createGenReg(const Selection &selection, GenRegInterval &interval);
/*! Indicate if the registers are already allocated in vectors */
bool isAllocated(const SelectionVector *vector) const;
/*! Reallocate registers if needed to make the registers in the vector
@@ -167,6 +168,8 @@ namespace gbe
uint32_t reservedReg;
/*! Current vector to expire */
uint32_t expiringID;
+ /*! Hole regs that can be reused */
+ map<uint32_t, vector<HoleRegTag>> HoleRegPool;
INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false);
INLINE bool expireReg(ir::Register reg);
INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment);
@@ -281,14 +284,66 @@ namespace gbe
}
}
- bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) {
+ INLINE float IDFitness(int a, int b) {
+ if (a >= b) return 1.0f/(a - b + 1);
+ else return 2.0f;
+ }
+
+ INLINE float getHoleRegFitness(const GenRegInterval &interval, HoleRegTag &holeRegTag) {
+ int regstID = interval.minID;
+ int regendID = interval.maxID;
+ int holeregstID = holeRegTag.startID;
+ int holeregendID = holeRegTag.endID;
+ return IDFitness(regstID, holeregstID) + IDFitness(holeregendID, regendID);
+ }
+
+ BVAR(OCL_REUSE_HOLE_REG, 1);
+ bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, GenRegInterval &interval) {
using namespace ir;
const ir::Register reg = interval.reg;
- if (RA.contains(reg) == true)
- return true; // already allocated
uint32_t regSize;
ir::RegisterFamily family;
getRegAttrib(reg, regSize, &family);
+ // Check if the reg is live only in one block, thus can use the hole
+ int blockID = interval.blockID;
+ if (blockID >= 0 && OCL_REUSE_HOLE_REG) {
+ uint32_t useID = interval.maxID;
+ auto holepool = this->HoleRegPool.find(blockID);
+ // Use block ID as index to get candidate hole reg to reuse
+ if (holepool != this->HoleRegPool.end()) {
+ auto &holepoolvec = holepool->second;
+ HoleRegTag* holeregbest = NULL;
+ float lastfitness = 0;
+ for (auto itr = holepoolvec.begin() ; itr != holepoolvec.end(); ++itr) {
+ if (regSize != itr->regSize)
+ continue;
+ float fitness = getHoleRegFitness(interval, *itr);
+ // reg out of range of holepool reg
+ if (fitness > 2.0f) continue;
+ if (fitness > lastfitness) {
+ lastfitness = fitness;
+ holeregbest = &*itr;
+ }
+ // reg has one prefect fit use this
+ if (fitness > 1 ) break;
+ }
+ // Reuse the hole and update the holeregpool
+ if (holeregbest) {
+ auto holereg = holeregbest->reg;
+ holeregbest->startID = useID + 1;
+ int32_t grfOffset = -1;
+ if (RA.contains(holereg)) {
+ //uint32_t grfOffset = RA.find(holereg)->second;
+ grfOffset = RA.find(holereg)->second;
+ RA.insert(std::make_pair(reg, grfOffset));
+ interval.usedHole= true;
+ intervals[holereg].usedHole = true;
+ }
+ }
+ }
+ }
+ if (RA.contains(reg) == true)
+ return true; // already allocated
uint32_t grfOffset = allocateReg(interval, regSize, regSize);
if (grfOffset == 0) {
return false;
@@ -738,7 +793,7 @@ namespace gbe
ctx.errCode = REGISTER_ALLOCATION_FAIL;
const uint32_t regNum = ctx.sel->getRegNum();
for (uint32_t startID = 0; startID < regNum; ++startID) {
- const GenRegInterval &interval = *this->starting[startID];
+ GenRegInterval &interval = *this->starting[startID];
const ir::Register reg = interval.reg;
if (interval.maxID == -INT_MAX)
@@ -856,6 +911,8 @@ namespace gbe
INLINE bool GenRegAllocator::Opaque::expireReg(ir::Register reg)
{
+ if (this->intervals[reg].usedHole && !this->intervals[reg].isHole)
+ return true;
auto it = RA.find(reg);
if (flagBooleans.contains(reg))
return false;
@@ -1076,11 +1133,16 @@ namespace gbe
break;
}
} else {
- spillSet.insert(reg);
- uint32_t s;
- getRegAttrib(reg, s);
- spillCostTotal += contiguousIter->cost;
- remainSize -= s;
+ // If one hole reg is used by some reg, we should not spill it
+ if (!(intervals[reg].isHole && intervals[reg].usedHole)) {
+ spillSet.insert(reg);
+ uint32_t s;
+ getRegAttrib(reg, s);
+ spillCostTotal += contiguousIter->cost;
+ remainSize -= s;
+ }
+ else
+ break;
}
if (remainSize <= 0)
break;
@@ -1234,10 +1296,12 @@ namespace gbe
}
// Compute the intervals
- int32_t insnID = 0;
+ int insnID = 0;
+ int blockID = 0;
for (auto &block : *selection.blockList) {
+ map<int32_t, BlockRegInterval> blockIntervals;
int32_t lastID = insnID;
- int32_t firstID = insnID;
+ int32_t firstID = insnID ? insnID + 2 : insnID;
// Update the intervals of each used register. Note that we do not
// register allocate R0, so we skip all sub-registers in r0
RegIntervalMap *boolsMap = new RegIntervalMap;
@@ -1276,6 +1340,10 @@ namespace gbe
insnsrcID -= 1;
this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnsrcID);
this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnsrcID);
+ BlockRegInterval &blockRegInterval = blockIntervals[reg];
+ blockRegInterval.useID = insnsrcID;
+ if (this->intervals[reg].blockID > 0 && this->intervals[reg].blockID != blockID)
+ this->intervals[reg].blockID = -1;
}
for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
const GenRegister &selReg = insn.dst(dstID);
@@ -1291,6 +1359,26 @@ namespace gbe
}
this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+ if ( this->intervals[reg].blockID == -1)
+ this->intervals[reg].blockID = blockID;
+ // Only find hole reg that not in ocl registers
+ if ( !selReg.subphysical && blockIntervals.find(reg) != blockIntervals.end() && reg >= ir::ocl::regNum ) {
+ BlockRegInterval &blockRegInterval = blockIntervals[reg];
+ int useID = blockRegInterval.useID;
+ // Check if the reg will leave a hole and add it to HoleRegPool
+ if (useID >= 0 && useID < insnID - 1 && reg >= ir::ocl::regNum) {
+ uint32_t regSize;
+ ir::RegisterFamily family;
+ getRegAttrib(reg, regSize, &family);
+ HoleRegTag holeregtag;
+ holeregtag.reg = reg;
+ holeregtag.startID = useID + 1;
+ holeregtag.endID = insnID - 1;
+ holeregtag.regSize = regSize;
+ this->HoleRegPool[blockID].push_back(holeregtag);
+ this->intervals[reg].isHole = true;
+ }
+ }
}
// OK, a flag is used as a predicate or a conditional modifier
@@ -1336,18 +1424,23 @@ namespace gbe
// All registers alive at the begining of the block must update their intervals.
const ir::BasicBlock *bb = block.bb;
bbLastInsnIDMap.insert(std::make_pair(bb, lastID));
- for (auto reg : ctx.getLiveIn(bb))
+ for (auto reg : ctx.getLiveIn(bb)) {
this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID);
-
+ this->intervals[reg].blockID = -1;
+ }
// All registers alive at the end of the block must have their intervals
// updated as well
- for (auto reg : ctx.getLiveOut(bb))
+ for (auto reg : ctx.getLiveOut(bb)) {
this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
+ this->intervals[reg].blockID = -1;
+ }
if (boolsMap->size() > 0)
boolIntervalsMap.insert(std::make_pair(&block, boolsMap));
else
delete boolsMap;
+
+ blockID++;
}
for (auto &it : this->intervals) {
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
index 8d5e7977..3c185396 100644
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -40,6 +40,17 @@ namespace gbe
int32_t addr;
} SpillRegTag;
+ typedef struct HoleRegTag {
+ uint32_t startID;
+ uint32_t endID;
+ uint32_t regSize;
+ ir::Register reg;
+ }HoleRegTag;
+
+ typedef struct BlockRegInterval {
+ int32_t blockID, defID, useID;
+ }BlockRegInterval;
+
typedef map<ir::Register, SpillRegTag> SpilledRegs;
/*! Register allocate (i.e. virtual to physical register mapping) */