summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp2050
1 files changed, 2050 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
new file mode 100644
index 00000000000..d65003ce4eb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -0,0 +1,2050 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir.h"
+#include "codegen/nv50_ir_target.h"
+
+#include <stack>
+#include <limits>
+
+namespace nv50_ir {
+
+#define MAX_REGISTER_FILE_SIZE 256
+
+class RegisterSet
+{
+public:
+ RegisterSet(const Target *);
+
+ void init(const Target *);
+ void reset(DataFile, bool resetMax = false);
+
+ void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
+ void intersect(DataFile f, const RegisterSet *);
+
+ bool assign(int32_t& reg, DataFile f, unsigned int size);
+ void release(DataFile f, int32_t reg, unsigned int size);
+ void occupy(DataFile f, int32_t reg, unsigned int size);
+ void occupy(const Value *);
+ void occupyMask(DataFile f, int32_t reg, uint8_t mask);
+ bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
+ bool testOccupy(const Value *);
+ bool testOccupy(DataFile f, int32_t reg, unsigned int size);
+
+ inline int getMaxAssigned(DataFile f) const { return fill[f]; }
+
+ inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
+ {
+ if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
+ return (last[f] + 1) / 2;
+ return last[f] + 1;
+ }
+
+ inline unsigned int units(DataFile f, unsigned int size) const
+ {
+ return size >> unit[f];
+ }
+ // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
+ inline unsigned int idToBytes(const Value *v) const
+ {
+ return v->reg.data.id * MIN2(v->reg.size, 4);
+ }
+ inline unsigned int idToUnits(const Value *v) const
+ {
+ return units(v->reg.file, idToBytes(v));
+ }
+ inline int bytesToId(Value *v, unsigned int bytes) const
+ {
+ if (v->reg.size < 4)
+ return units(v->reg.file, bytes);
+ return bytes / 4;
+ }
+ inline int unitsToId(DataFile f, int u, uint8_t size) const
+ {
+ if (u < 0)
+ return -1;
+ return (size < 4) ? u : ((u << unit[f]) / 4);
+ }
+
+ void print() const;
+
+private:
+ BitSet bits[LAST_REGISTER_FILE + 1];
+
+ int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
+
+ int last[LAST_REGISTER_FILE + 1];
+ int fill[LAST_REGISTER_FILE + 1];
+
+ const bool restrictedGPR16Range;
+};
+
+void
+RegisterSet::reset(DataFile f, bool resetMax)
+{
+ bits[f].fill(0);
+ if (resetMax)
+ fill[f] = -1;
+}
+
+void
+RegisterSet::init(const Target *targ)
+{
+ for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
+ DataFile f = static_cast<DataFile>(rf);
+ last[rf] = targ->getFileSize(f) - 1;
+ unit[rf] = targ->getFileUnit(f);
+ fill[rf] = -1;
+ assert(last[rf] < MAX_REGISTER_FILE_SIZE);
+ bits[rf].allocate(last[rf] + 1, true);
+ }
+}
+
+RegisterSet::RegisterSet(const Target *targ)
+ : restrictedGPR16Range(targ->getChipset() < 0xc0)
+{
+ init(targ);
+ for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
+ reset(static_cast<DataFile>(i));
+}
+
+void
+RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
+{
+ bits[f].periodicMask32(lock, unlock);
+}
+
+void
+RegisterSet::intersect(DataFile f, const RegisterSet *set)
+{
+ bits[f] |= set->bits[f];
+}
+
+void
+RegisterSet::print() const
+{
+ INFO("GPR:");
+ bits[FILE_GPR].print();
+ INFO("\n");
+}
+
+bool
+RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
+{
+ reg = bits[f].findFreeRange(size);
+ if (reg < 0)
+ return false;
+ fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
+ return true;
+}
+
+bool
+RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
+{
+ return bits[f].testRange(reg, size);
+}
+
+void
+RegisterSet::occupy(const Value *v)
+{
+ occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
+}
+
+void
+RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
+{
+ bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
+}
+
+void
+RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
+{
+ bits[f].setRange(reg, size);
+
+ INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
+
+ fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
+}
+
+bool
+RegisterSet::testOccupy(const Value *v)
+{
+ return testOccupy(v->reg.file,
+ idToUnits(v), v->reg.size >> unit[v->reg.file]);
+}
+
+bool
+RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
+{
+ if (isOccupied(f, reg, size))
+ return false;
+ occupy(f, reg, size);
+ return true;
+}
+
+void
+RegisterSet::release(DataFile f, int32_t reg, unsigned int size)
+{
+ bits[f].clrRange(reg, size);
+
+ INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
+}
+
+class RegAlloc
+{
+public:
+ RegAlloc(Program *program) : prog(program), sequence(0) { }
+
+ bool exec();
+ bool execFunc();
+
+private:
+ class PhiMovesPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+ };
+
+ class ArgumentMovesPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ };
+
+ class BuildIntervalsPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ void collectLiveValues(BasicBlock *);
+ void addLiveRange(Value *, const BasicBlock *, int end);
+ };
+
+ class InsertConstraintsPass : public Pass {
+ public:
+ bool exec(Function *func);
+ private:
+ virtual bool visit(BasicBlock *);
+
+ bool insertConstraintMoves();
+
+ void condenseDefs(Instruction *);
+ void condenseSrcs(Instruction *, const int first, const int last);
+
+ void addHazard(Instruction *i, const ValueRef *src);
+ void textureMask(TexInstruction *);
+ void addConstraint(Instruction *, int s, int n);
+ bool detectConflict(Instruction *, int s);
+
+ // target specific functions, TODO: put in subclass or Target
+ void texConstraintNV50(TexInstruction *);
+ void texConstraintNVC0(TexInstruction *);
+ void texConstraintNVE0(TexInstruction *);
+
+ std::list<Instruction *> constrList;
+
+ const Target *targ;
+ };
+
+ bool buildLiveSets(BasicBlock *);
+
+private:
+ Program *prog;
+ Function *func;
+
+ // instructions in control flow / chronological order
+ ArrayList insns;
+
+ int sequence; // for manual passes through CFG
+};
+
+typedef std::pair<Value *, Value *> ValuePair;
+
+class SpillCodeInserter
+{
+public:
+ SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
+
+ bool run(const std::list<ValuePair>&);
+
+ Symbol *assignSlot(const Interval&, const unsigned int size);
+ inline int32_t getStackSize() const { return stackSize; }
+
+private:
+ Function *func;
+
+ struct SpillSlot
+ {
+ Interval occup;
+ std::list<Value *> residents; // needed to recalculate occup
+ Symbol *sym;
+ int32_t offset;
+ inline uint8_t size() const { return sym->reg.size; }
+ };
+ std::list<SpillSlot> slots;
+ int32_t stackSize;
+ int32_t stackBase;
+
+ LValue *unspill(Instruction *usei, LValue *, Value *slot);
+ void spill(Instruction *defi, Value *slot, LValue *);
+};
+
+void
+RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
+ const BasicBlock *bb,
+ int end)
+{
+ Instruction *insn = val->getUniqueInsn();
+
+ if (!insn)
+ insn = bb->getFirst();
+
+ assert(bb->getFirst()->serial <= bb->getExit()->serial);
+ assert(bb->getExit()->serial + 1 >= end);
+
+ int begin = insn->serial;
+ if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
+ begin = bb->getEntry()->serial;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
+ val->id, begin, insn->serial, end);
+
+ if (begin != end) // empty ranges are only added as hazards for fixed regs
+ val->livei.extend(begin, end);
+}
+
+bool
+RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
+{
+ if (b->cfg.incidentCount() <= 1)
+ return false;
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
+ if (ei.getType() == Graph::Edge::TREE ||
+ ei.getType() == Graph::Edge::FORWARD)
+ ++n;
+ return (n == 2);
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+ Instruction *phi, *mov;
+ BasicBlock *pb, *pn;
+
+ std::stack<BasicBlock *> stack;
+
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = BasicBlock::get(ei.getNode());
+ assert(pb);
+ if (needNewElseBlock(bb, pb))
+ stack.push(pb);
+ }
+ while (!stack.empty()) {
+ pb = stack.top();
+ pn = new BasicBlock(func);
+ stack.pop();
+
+ pb->cfg.detach(&bb->cfg);
+ pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
+ pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
+
+ assert(pb->getExit()->op != OP_CALL);
+ if (pb->getExit()->asFlow()->target.bb == bb)
+ pb->getExit()->asFlow()->target.bb = pn;
+ }
+
+ // insert MOVs (phi->src(j) should stem from j-th in-BB)
+ int j = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = BasicBlock::get(ei.getNode());
+ if (!pb->isTerminated())
+ pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
+
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ mov = new_Instruction(func, OP_MOV, TYPE_U32);
+
+ mov->setSrc(0, phi->getSrc(j));
+ mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
+ phi->setSrc(j, mov->getDef(0));
+
+ pb->insertBefore(pb->getExit(), mov);
+ }
+ ++j;
+ }
+
+ return true;
+}
+
+bool
+RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
+{
+ // Bind function call inputs/outputs to the same physical register
+ // the callee uses, inserting moves as appropriate for the case a
+ // conflict arises.
+ for (Instruction *i = bb->getEntry(); i; i = i->next) {
+ FlowInstruction *cal = i->asFlow();
+ // TODO: Handle indirect calls.
+ // Right now they should only be generated for builtins.
+ if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect)
+ continue;
+ RegisterSet clobberSet(prog->getTarget());
+
+ // Bind input values.
+ for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) {
+ const int t = cal->indirect ? (s - 1) : s;
+ LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
+ tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id;
+
+ Instruction *mov =
+ new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
+ mov->setDef(0, tmp);
+ mov->setSrc(0, cal->getSrc(s));
+ cal->setSrc(s, tmp);
+
+ bb->insertBefore(cal, mov);
+ }
+
+ // Bind output values.
+ for (int d = 0; cal->defExists(d); ++d) {
+ LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
+ tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
+
+ Instruction *mov =
+ new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
+ mov->setSrc(0, tmp);
+ mov->setDef(0, cal->getDef(d));
+ cal->setDef(d, tmp);
+
+ bb->insertAfter(cal, mov);
+ clobberSet.occupy(tmp);
+ }
+
+ // Bind clobbered values.
+ for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
+ it != cal->target.fn->clobbers.end();
+ ++it) {
+ if (clobberSet.testOccupy(*it)) {
+ Value *tmp = new_LValue(func, (*it)->asLValue());
+ tmp->reg.data.id = (*it)->reg.data.id;
+ cal->setDef(cal->defCount(), tmp);
+ }
+ }
+ }
+
+ // Update the clobber set of the function.
+ if (BasicBlock::get(func->cfgExit) == bb) {
+ func->buildDefSets();
+ for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
+ if (bb->defSet.test(i))
+ func->clobbers.push_back(func->getLValue(i));
+ }
+
+ return true;
+}
+
+// Build the set of live-in variables of bb.
+bool
+RegAlloc::buildLiveSets(BasicBlock *bb)
+{
+ Function *f = bb->getFunction();
+ BasicBlock *bn;
+ Instruction *i;
+ unsigned int s, d;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
+
+ bb->liveSet.allocate(func->allLValues.getSize(), false);
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ bn = BasicBlock::get(ei.getNode());
+ if (bn == bb)
+ continue;
+ if (bn->cfg.visit(sequence))
+ if (!buildLiveSets(bn))
+ return false;
+ if (n++ || bb->liveSet.marker)
+ bb->liveSet |= bn->liveSet;
+ else
+ bb->liveSet = bn->liveSet;
+ }
+ if (!n && !bb->liveSet.marker)
+ bb->liveSet.fill(0);
+ bb->liveSet.marker = true;
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set of out blocks:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ // if (!bb->getEntry())
+ // return true;
+
+ if (bb == BasicBlock::get(f->cfgExit)) {
+ for (std::deque<ValueRef>::iterator it = f->outs.begin();
+ it != f->outs.end(); ++it) {
+ assert(it->get()->asLValue());
+ bb->liveSet.set(it->get()->id);
+ }
+ }
+
+ for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
+ for (d = 0; i->defExists(d); ++d)
+ bb->liveSet.clr(i->getDef(d)->id);
+ for (s = 0; i->srcExists(s); ++s)
+ if (i->getSrc(s)->asLValue())
+ bb->liveSet.set(i->getSrc(s)->id);
+ }
+ for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set after propagation:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ return true;
+}
+
+void
+RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
+{
+ BasicBlock *bbA = NULL, *bbB = NULL;
+
+ if (bb->cfg.outgoingCount()) {
+ // trickery to save a loop of OR'ing liveSets
+ // aliasing works fine with BitSet::setOr
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ if (ei.getType() == Graph::Edge::DUMMY)
+ continue;
+ if (bbA) {
+ bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
+ bbA = bb;
+ } else {
+ bbA = bbB;
+ }
+ bbB = BasicBlock::get(ei.getNode());
+ }
+ bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
+ } else
+ if (bb->cfg.incidentCount()) {
+ bb->liveSet.fill(0);
+ }
+}
+
+bool
+RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
+{
+ collectLiveValues(bb);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
+
+ // go through out blocks and delete phi sources that do not originate from
+ // the current block from the live set
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ BasicBlock *out = BasicBlock::get(ei.getNode());
+
+ for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ assert(i->src(s).getInsn());
+ if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
+ bb->liveSet.set(i->getSrc(s)->id);
+ else
+ bb->liveSet.clr(i->getSrc(s)->id);
+ }
+ }
+ }
+
+ // remaining live-outs are live until end
+ if (bb->getExit()) {
+ for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
+ if (bb->liveSet.test(j))
+ addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
+ }
+
+ for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
+ for (int d = 0; i->defExists(d); ++d) {
+ bb->liveSet.clr(i->getDef(d)->id);
+ if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
+ i->getDef(d)->livei.extend(i->serial, i->serial);
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ if (!i->getSrc(s)->asLValue())
+ continue;
+ if (!bb->liveSet.test(i->getSrc(s)->id)) {
+ bb->liveSet.set(i->getSrc(s)->id);
+ addLiveRange(i->getSrc(s), bb, i->serial);
+ }
+ }
+ }
+
+ if (bb == BasicBlock::get(func->cfg.getRoot())) {
+ for (std::deque<ValueDef>::iterator it = func->ins.begin();
+ it != func->ins.end(); ++it) {
+ if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
+ it->get()->livei.extend(0, 1);
+ }
+ }
+
+ return true;
+}
+
+
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_UNION (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_TEX (1 << 3)
+
+class GCRA
+{
+public:
+ GCRA(Function *, SpillCodeInserter&);
+ ~GCRA();
+
+ bool allocateRegisters(ArrayList& insns);
+
+ void printNodeInfo() const;
+
+private:
+ class RIG_Node : public Graph::Node
+ {
+ public:
+ RIG_Node();
+
+ void init(const RegisterSet&, LValue *);
+
+ void addInterference(RIG_Node *);
+ void addRegPreference(RIG_Node *);
+
+ inline LValue *getValue() const
+ {
+ return reinterpret_cast<LValue *>(data);
+ }
+ inline void setValue(LValue *lval) { data = lval; }
+
+ inline uint8_t getCompMask() const
+ {
+ return ((1 << colors) - 1) << (reg & 7);
+ }
+
+ static inline RIG_Node *get(const Graph::EdgeIterator& ei)
+ {
+ return static_cast<RIG_Node *>(ei.getNode());
+ }
+
+ public:
+ uint32_t degree;
+ uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
+ uint16_t colors;
+
+ DataFile f;
+ int32_t reg;
+
+ float weight;
+
+ // list pointers for simplify() phase
+ RIG_Node *next;
+ RIG_Node *prev;
+
+ // union of the live intervals of all coalesced values (we want to retain
+ // the separate intervals for testing interference of compound values)
+ Interval livei;
+
+ std::list<RIG_Node *> prefRegs;
+ };
+
+private:
+ inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
+
+ void buildRIG(ArrayList&);
+ bool coalesce(ArrayList&);
+ bool doCoalesce(ArrayList&, unsigned int mask);
+ void calculateSpillWeights();
+ void simplify();
+ bool selectRegisters();
+ void cleanup(const bool success);
+
+ void simplifyEdge(RIG_Node *, RIG_Node *);
+ void simplifyNode(RIG_Node *);
+
+ bool coalesceValues(Value *, Value *, bool force);
+ void resolveSplitsAndMerges();
+ void makeCompound(Instruction *, bool isSplit);
+
+ inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
+
+ inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
+ void checkList(std::list<RIG_Node *>&);
+
+private:
+ std::stack<uint32_t> stack;
+
+ // list headers for simplify() phase
+ RIG_Node lo[2];
+ RIG_Node hi;
+
+ Graph RIG;
+ RIG_Node *nodes;
+ unsigned int nodeCount;
+
+ Function *func;
+ Program *prog;
+
+ static uint8_t relDegree[17][17];
+
+ RegisterSet regs;
+
+ // need to fixup register id for participants of OP_MERGE/SPLIT
+ std::list<Instruction *> merges;
+ std::list<Instruction *> splits;
+
+ SpillCodeInserter& spill;
+ std::list<ValuePair> mustSpill;
+};
+
+uint8_t GCRA::relDegree[17][17];
+
+GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
+{
+ colors = 0;
+}
+
+void
+GCRA::printNodeInfo() const
+{
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ if (!nodes[i].colors)
+ continue;
+ INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
+ i,
+ nodes[i].f,nodes[i].reg,nodes[i].colors,
+ nodes[i].weight,
+ nodes[i].degree, nodes[i].degreeLimit);
+
+ for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
+ INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
+ for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
+ INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
+ INFO("\n");
+ }
+}
+
+void
+GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
+{
+ setValue(lval);
+ if (lval->reg.data.id >= 0)
+ lval->noSpill = lval->fixedReg = 1;
+
+ colors = regs.units(lval->reg.file, lval->reg.size);
+ f = lval->reg.file;
+ reg = -1;
+ if (lval->reg.data.id >= 0)
+ reg = regs.idToUnits(lval);
+
+ weight = std::numeric_limits<float>::infinity();
+ degree = 0;
+ degreeLimit = regs.getFileSize(f, lval->reg.size);
+
+ livei.insert(lval->livei);
+}
+
+bool
+GCRA::coalesceValues(Value *dst, Value *src, bool force)
+{
+ LValue *rep = dst->join->asLValue();
+ LValue *val = src->join->asLValue();
+
+ if (!force && val->reg.data.id >= 0) {
+ rep = src->join->asLValue();
+ val = dst->join->asLValue();
+ }
+ RIG_Node *nRep = &nodes[rep->id];
+ RIG_Node *nVal = &nodes[val->id];
+
+ if (src->reg.file != dst->reg.file) {
+ if (!force)
+ return false;
+ WARN("forced coalescing of values in different files !\n");
+ }
+ if (!force && dst->reg.size != src->reg.size)
+ return false;
+
+ if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
+ if (force) {
+ if (val->reg.data.id >= 0)
+ WARN("forced coalescing of values in different fixed regs !\n");
+ } else {
+ if (val->reg.data.id >= 0)
+ return false;
+ // make sure that there is no overlap with the fixed register of rep
+ for (ArrayList::Iterator it = func->allLValues.iterator();
+ !it.end(); it.next()) {
+ Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
+ assert(reg);
+ if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
+ return false;
+ }
+ }
+ }
+
+ if (!force && nRep->livei.overlaps(nVal->livei))
+ return false;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
+ rep->id, rep->reg.data.id, val->id);
+
+ // set join pointer of all values joined with val
+ for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
+ ++def)
+ (*def)->get()->join = rep;
+ assert(rep->join == rep && val->join == rep);
+
+ // add val's definitions to rep and extend the live interval of its RIG node
+ rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
+ nRep->livei.unify(nVal->livei);
+ return true;
+}
+
+bool
+GCRA::coalesce(ArrayList& insns)
+{
+ bool ret = doCoalesce(insns, JOIN_MASK_PHI);
+ if (!ret)
+ return false;
+ switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
+ break;
+ case 0xc0:
+ case 0xd0:
+ case 0xe0:
+ ret = doCoalesce(insns, JOIN_MASK_UNION);
+ break;
+ default:
+ break;
+ }
+ if (!ret)
+ return false;
+ return doCoalesce(insns, JOIN_MASK_MOV);
+}
+
+static inline uint8_t makeCompMask(int compSize, int base, int size)
+{
+ uint8_t m = ((1 << size) - 1) << base;
+
+ switch (compSize) {
+ case 1:
+ return 0xff;
+ case 2:
+ m |= (m << 2);
+ return (m << 4) | m;
+ case 3:
+ case 4:
+ return (m << 4) | m;
+ default:
+ assert(compSize <= 8);
+ return m;
+ }
+}
+
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
+static inline void copyCompound(Value *dst, Value *src)
+{
+ LValue *ldst = dst->asLValue();
+ LValue *lsrc = src->asLValue();
+
+ if (ldst->compound && !lsrc->compound) {
+ LValue *swap = lsrc;
+ lsrc = ldst;
+ ldst = swap;
+ }
+
+ ldst->compound = lsrc->compound;
+ ldst->compMask = lsrc->compMask;
+}
+
+void
+GCRA::makeCompound(Instruction *insn, bool split)
+{
+ LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("makeCompound(split = %i): ", split);
+ insn->print();
+ }
+
+ const unsigned int size = getNode(rep)->colors;
+ unsigned int base = 0;
+
+ if (!rep->compound)
+ rep->compMask = 0xff;
+ rep->compound = 1;
+
+ for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
+ LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
+
+ val->compound = 1;
+ if (!val->compMask)
+ val->compMask = 0xff;
+ val->compMask &= makeCompMask(size, base, getNode(val)->colors);
+ assert(val->compMask);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
+ rep->id, rep->compMask, val->id, val->compMask);
+
+ base += getNode(val)->colors;
+ }
+ assert(base == size);
+}
+
+bool
+GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
+{
+ int c, n;
+
+ for (n = 0; n < insns.getSize(); ++n) {
+ Instruction *i;
+ Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
+
+ switch (insn->op) {
+ case OP_PHI:
+ if (!(mask & JOIN_MASK_PHI))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
+ // this is bad
+ ERROR("failed to coalesce phi operands\n");
+ return false;
+ }
+ break;
+ case OP_UNION:
+ case OP_MERGE:
+ if (!(mask & JOIN_MASK_UNION))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ coalesceValues(insn->getDef(0), insn->getSrc(c), true);
+ if (insn->op == OP_MERGE) {
+ merges.push_back(insn);
+ if (insn->srcExists(1))
+ makeCompound(insn, false);
+ }
+ break;
+ case OP_SPLIT:
+ if (!(mask & JOIN_MASK_UNION))
+ break;
+ splits.push_back(insn);
+ for (c = 0; insn->defExists(c); ++c)
+ coalesceValues(insn->getSrc(0), insn->getDef(c), true);
+ makeCompound(insn, true);
+ break;
+ case OP_MOV:
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ i = NULL;
+ if (!insn->getDef(0)->uses.empty())
+ i = insn->getDef(0)->uses.front()->getInsn();
+ // if this is a contraint-move there will only be a single use
+ if (i && i->op == OP_MERGE) // do we really still need this ?
+ break;
+ i = insn->getSrc(0)->getUniqueInsn();
+ if (i && !i->constrainedDefs()) {
+ if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
+ copyCompound(insn->getSrc(0), insn->getDef(0));
+ }
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXF:
+ case OP_TXQ:
+ case OP_TXD:
+ case OP_TXG:
+ case OP_TEXCSAA:
+ if (!(mask & JOIN_MASK_TEX))
+ break;
+ for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
+ coalesceValues(insn->getDef(c), insn->getSrc(c), true);
+ break;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+void
+GCRA::RIG_Node::addInterference(RIG_Node *node)
+{
+ this->degree += relDegree[node->colors][colors];
+ node->degree += relDegree[colors][node->colors];
+
+ this->attach(node, Graph::Edge::CROSS);
+}
+
+void
+GCRA::RIG_Node::addRegPreference(RIG_Node *node)
+{
+ prefRegs.push_back(node);
+}
+
+GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
+ func(fn),
+ regs(fn->getProgram()->getTarget()),
+ spill(spill)
+{
+ prog = func->getProgram();
+
+ // initialize relative degrees array - i takes away from j
+ for (int i = 1; i <= 16; ++i)
+ for (int j = 1; j <= 16; ++j)
+ relDegree[i][j] = j * ((i + j - 1) / j);
+}
+
+GCRA::~GCRA()
+{
+ if (nodes)
+ delete[] nodes;
+}
+
+void
+GCRA::checkList(std::list<RIG_Node *>& lst)
+{
+ GCRA::RIG_Node *prev = NULL;
+
+ for (std::list<RIG_Node *>::iterator it = lst.begin();
+ it != lst.end();
+ ++it) {
+ assert((*it)->getValue()->join == (*it)->getValue());
+ if (prev)
+ assert(prev->livei.begin() <= (*it)->livei.begin());
+ prev = *it;
+ }
+}
+
+void
+GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
+{
+ if (node->livei.isEmpty())
+ return;
+ // only the intervals of joined values don't necessarily arrive in order
+ std::list<RIG_Node *>::iterator prev, it;
+ for (it = list.end(); it != list.begin(); it = prev) {
+ prev = it;
+ --prev;
+ if ((*prev)->livei.begin() <= node->livei.begin())
+ break;
+ }
+ list.insert(it, node);
+}
+
+void
+GCRA::buildRIG(ArrayList& insns)
+{
+ std::list<RIG_Node *> values, active;
+
+ for (std::deque<ValueDef>::iterator it = func->ins.begin();
+ it != func->ins.end(); ++it)
+ insertOrderedTail(values, getNode(it->get()->asLValue()));
+
+ for (int i = 0; i < insns.getSize(); ++i) {
+ Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
+ for (int d = 0; insn->defExists(d); ++d)
+ if (insn->getDef(d)->rep() == insn->getDef(d))
+ insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
+ }
+ checkList(values);
+
+ while (!values.empty()) {
+ RIG_Node *cur = values.front();
+
+ for (std::list<RIG_Node *>::iterator it = active.begin();
+ it != active.end();) {
+ RIG_Node *node = *it;
+
+ if (node->livei.end() <= cur->livei.begin()) {
+ it = active.erase(it);
+ } else {
+ if (node->f == cur->f && node->livei.overlaps(cur->livei))
+ cur->addInterference(node);
+ ++it;
+ }
+ }
+ values.pop_front();
+ active.push_back(cur);
+ }
+}
+
+void
+GCRA::calculateSpillWeights()
+{
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ RIG_Node *const n = &nodes[i];
+ if (!nodes[i].colors || nodes[i].livei.isEmpty())
+ continue;
+ if (nodes[i].reg >= 0) {
+ // update max reg
+ regs.occupy(n->f, n->reg, n->colors);
+ continue;
+ }
+ LValue *val = nodes[i].getValue();
+
+ if (!val->noSpill) {
+ int rc = 0;
+ for (Value::DefIterator it = val->defs.begin();
+ it != val->defs.end();
+ ++it)
+ rc += (*it)->get()->refCount();
+
+ nodes[i].weight =
+ (float)rc * (float)rc / (float)nodes[i].livei.extent();
+ }
+
+ if (nodes[i].degree < nodes[i].degreeLimit) {
+ int l = 0;
+ if (val->reg.size > 4)
+ l = 1;
+ DLLIST_ADDHEAD(&lo[l], &nodes[i]);
+ } else {
+ DLLIST_ADDHEAD(&hi, &nodes[i]);
+ }
+ }
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ printNodeInfo();
+}
+
+void
+GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
+{
+ bool move = b->degree >= b->degreeLimit;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
+ a->getValue()->id, a->degree, a->degreeLimit,
+ b->getValue()->id, b->degree, b->degreeLimit);
+
+ b->degree -= relDegree[a->colors][b->colors];
+
+ move = move && b->degree < b->degreeLimit;
+ if (move && !DLLIST_EMPTY(b)) {
+ int l = (b->getValue()->reg.size > 4) ? 1 : 0;
+ DLLIST_DEL(b);
+ DLLIST_ADDTAIL(&lo[l], b);
+ }
+}
+
+void
+GCRA::simplifyNode(RIG_Node *node)
+{
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
+ simplifyEdge(node, RIG_Node::get(ei));
+
+ for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
+ simplifyEdge(node, RIG_Node::get(ei));
+
+ DLLIST_DEL(node);
+ stack.push(node->getValue()->id);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
+ node->getValue()->id,
+ (node->degree < node->degreeLimit) ? "" : "(spill)");
+}
+
+void
+GCRA::simplify()
+{
+ for (;;) {
+ if (!DLLIST_EMPTY(&lo[0])) {
+ do {
+ simplifyNode(lo[0].next);
+ } while (!DLLIST_EMPTY(&lo[0]));
+ } else
+ if (!DLLIST_EMPTY(&lo[1])) {
+ simplifyNode(lo[1].next);
+ } else
+ if (!DLLIST_EMPTY(&hi)) {
+ RIG_Node *best = hi.next;
+ float bestScore = best->weight / (float)best->degree;
+ // spill candidate
+ for (RIG_Node *it = best->next; it != &hi; it = it->next) {
+ float score = it->weight / (float)it->degree;
+ if (score < bestScore) {
+ best = it;
+ bestScore = score;
+ }
+ }
+ if (isinf(bestScore)) {
+ ERROR("no viable spill candidates left\n");
+ break;
+ }
+ simplifyNode(best);
+ } else {
+ break;
+ }
+ }
+}
+
+void
+GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
+{
+ const RIG_Node *intf = RIG_Node::get(ei);
+
+ if (intf->reg < 0)
+ return;
+ const LValue *vA = node->getValue();
+ const LValue *vB = intf->getValue();
+
+ const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
+
+ if (vA->compound | vB->compound) {
+ // NOTE: this only works for >aligned< register tuples !
+ for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
+ for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
+ const LValue *vD = (*D)->get()->asLValue();
+ const LValue *vd = (*d)->get()->asLValue();
+
+ if (!vD->livei.overlaps(vd->livei)) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
+ vD->id, vd->id);
+ continue;
+ }
+
+ uint8_t mask = vD->compound ? vD->compMask : ~0;
+ if (vd->compound) {
+ assert(vB->compound);
+ mask &= vd->compMask & vB->compMask;
+ } else {
+ mask &= intfMask;
+ }
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
+ vD->id,
+ vD->compound ? vD->compMask : 0xff,
+ vd->id,
+ vd->compound ? vd->compMask : intfMask,
+ vB->compMask, intf->reg & ~7, mask);
+ if (mask)
+ regs.occupyMask(node->f, intf->reg & ~7, mask);
+ }
+ }
+ } else {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "(%%%i) X (%%%i): $r%i + %u\n",
+ vA->id, vB->id, intf->reg, intf->colors);
+ regs.occupy(node->f, intf->reg, intf->colors);
+ }
+}
+
+bool
+GCRA::selectRegisters()
+{
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
+
+ while (!stack.empty()) {
+ RIG_Node *node = &nodes[stack.top()];
+ stack.pop();
+
+ regs.reset(node->f);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
+ node->getValue()->id, node->colors);
+
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
+ checkInterference(node, ei);
+ for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
+ checkInterference(node, ei);
+
+ if (!node->prefRegs.empty()) {
+ for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
+ it != node->prefRegs.end();
+ ++it) {
+ if ((*it)->reg >= 0 &&
+ regs.testOccupy(node->f, (*it)->reg, node->colors)) {
+ node->reg = (*it)->reg;
+ break;
+ }
+ }
+ }
+ if (node->reg >= 0)
+ continue;
+ LValue *lval = node->getValue();
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ regs.print();
+ bool ret = regs.assign(node->reg, node->f, node->colors);
+ if (ret) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
+ lval->compMask = node->getCompMask();
+ } else {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
+ lval->id, lval->reg.size);
+ Symbol *slot = NULL;
+ if (lval->reg.file == FILE_GPR)
+ slot = spill.assignSlot(node->livei, lval->reg.size);
+ mustSpill.push_back(ValuePair(lval, slot));
+ }
+ }
+ if (!mustSpill.empty())
+ return false;
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ LValue *lval = nodes[i].getValue();
+ if (nodes[i].reg >= 0 && nodes[i].colors > 0)
+ lval->reg.data.id =
+ regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
+ }
+ return true;
+}
+
+bool
+GCRA::allocateRegisters(ArrayList& insns)
+{
+ bool ret;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "allocateRegisters to %u instructions\n", insns.getSize());
+
+ nodeCount = func->allLValues.getSize();
+ nodes = new RIG_Node[nodeCount];
+ if (!nodes)
+ return false;
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
+ if (lval) {
+ nodes[i].init(regs, lval);
+ RIG.insert(&nodes[i]);
+ }
+ }
+
+ // coalesce first, we use only 1 RIG node for a group of joined values
+ ret = coalesce(insns);
+ if (!ret)
+ goto out;
+
+ if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->printLiveIntervals();
+
+ buildRIG(insns);
+ calculateSpillWeights();
+ simplify();
+
+ ret = selectRegisters();
+ if (!ret) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "selectRegisters failed, inserting spill code ...\n");
+ regs.reset(FILE_GPR, true);
+ spill.run(mustSpill);
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->print();
+ } else {
+ prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
+ }
+
+out:
+ cleanup(ret);
+ return ret;
+}
+
+void
+GCRA::cleanup(const bool success)
+{
+ mustSpill.clear();
+
+ for (ArrayList::Iterator it = func->allLValues.iterator();
+ !it.end(); it.next()) {
+ LValue *lval = reinterpret_cast<LValue *>(it.get());
+
+ lval->livei.clear();
+
+ lval->compound = 0;
+ lval->compMask = 0;
+
+ if (lval->join == lval)
+ continue;
+
+ if (success) {
+ lval->reg.data.id = lval->join->reg.data.id;
+ } else {
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
+ ++d)
+ lval->join->defs.remove(*d);
+ lval->join = lval;
+ }
+ }
+
+ if (success)
+ resolveSplitsAndMerges();
+ splits.clear(); // avoid duplicate entries on next coalesce pass
+ merges.clear();
+
+ delete[] nodes;
+ nodes = NULL;
+}
+
+Symbol *
+SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
+{
+ SpillSlot slot;
+ int32_t offsetBase = stackSize;
+ int32_t offset;
+ std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
+
+ if (offsetBase % size)
+ offsetBase += size - (offsetBase % size);
+
+ slot.sym = NULL;
+
+ for (offset = offsetBase; offset < stackSize; offset += size) {
+ const int32_t entryEnd = offset + size;
+ while (it != slots.end() && it->offset < offset)
+ ++it;
+ if (it == slots.end()) // no slots left
+ break;
+ std::list<SpillSlot>::iterator bgn = it;
+
+ while (it != slots.end() && it->offset < entryEnd) {
+ it->occup.print();
+ if (it->occup.overlaps(livei))
+ break;
+ ++it;
+ }
+ if (it == slots.end() || it->offset >= entryEnd) {
+ // fits
+ for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
+ bgn->occup.insert(livei);
+ if (bgn->size() == size)
+ slot.sym = bgn->sym;
+ }
+ break;
+ }
+ }
+ if (!slot.sym) {
+ stackSize = offset + size;
+ slot.offset = offset;
+ slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
+ if (!func->stackPtr)
+ offset += func->tlsBase;
+ slot.sym->setAddress(NULL, offset);
+ slot.sym->reg.size = size;
+ slots.insert(pos, slot)->occup.insert(livei);
+ }
+ return slot.sym;
+}
+
+void
+SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
+{
+ const DataType ty = typeOfSize(slot->reg.size);
+
+ Instruction *st;
+ if (slot->reg.file == FILE_MEMORY_LOCAL) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ lval->noSpill = 1;
+ } else {
+ st = new_Instruction(func, OP_CVT, ty);
+ st->setDef(0, slot);
+ st->setSrc(0, lval);
+ }
+ defi->bb->insertAfter(defi, st);
+}
+
+LValue *
+SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
+{
+ const DataType ty = typeOfSize(slot->reg.size);
+
+ lval = cloneShallow(func, lval);
+
+ Instruction *ld;
+ if (slot->reg.file == FILE_MEMORY_LOCAL) {
+ lval->noSpill = 1;
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_CVT, ty);
+ }
+ ld->setDef(0, lval);
+ ld->setSrc(0, slot);
+
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+}
+
+bool
+SpillCodeInserter::run(const std::list<ValuePair>& lst)
+{
+ for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
+ ++it) {
+ LValue *lval = it->first->asLValue();
+ Symbol *mem = it->second ? it->second->asSym() : NULL;
+
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
+ ++d) {
+ Value *slot = mem ?
+ static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
+ Value *tmp = NULL;
+ Instruction *last = NULL;
+
+ LValue *dval = (*d)->get()->asLValue();
+ Instruction *defi = (*d)->getInsn();
+
+ // handle uses first or they'll contain the spill stores
+ while (!dval->uses.empty()) {
+ ValueRef *u = dval->uses.front();
+ Instruction *usei = u->getInsn();
+ assert(usei);
+ if (usei->op == OP_PHI) {
+ tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
+ last = NULL;
+ } else
+ if (!last || usei != last->next) { // TODO: sort uses
+ tmp = unspill(usei, dval, slot);
+ last = usei;
+ }
+ u->set(tmp);
+ }
+
+ assert(defi);
+ if (defi->op == OP_PHI) {
+ d = lval->defs.erase(d);
+ --d;
+ if (slot->reg.file == FILE_MEMORY_LOCAL)
+ delete_Instruction(func->getProgram(), defi);
+ else
+ defi->setDef(0, slot);
+ } else {
+ spill(defi, slot, dval);
+ }
+ }
+
+ }
+
+ // TODO: We're not trying to reuse old slots in a potential next iteration.
+ // We have to update the slots' livei intervals to be able to do that.
+ stackBase = stackSize;
+ slots.clear();
+ return true;
+}
+
+bool
+RegAlloc::exec()
+{
+ for (IteratorRef it = prog->calls.iteratorDFS(false);
+ !it->end(); it->next()) {
+ func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
+
+ func->tlsBase = prog->tlsSize;
+ if (!execFunc())
+ return false;
+ prog->tlsSize += func->tlsSize;
+ }
+ return true;
+}
+
+bool
+RegAlloc::execFunc()
+{
+ InsertConstraintsPass insertConstr;
+ PhiMovesPass insertPhiMoves;
+ ArgumentMovesPass insertArgMoves;
+ BuildIntervalsPass buildIntervals;
+ SpillCodeInserter insertSpills(func);
+
+ GCRA gcra(func, insertSpills);
+
+ unsigned int i, retries;
+ bool ret;
+
+ if (!func->ins.empty()) {
+ // Insert a nop at the entry so inputs only used by the first instruction
+ // don't count as having an empty live range.
+ Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE);
+ BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
+ }
+
+ ret = insertConstr.exec(func);
+ if (!ret)
+ goto out;
+
+ ret = insertPhiMoves.run(func);
+ if (!ret)
+ goto out;
+
+ ret = insertArgMoves.run(func);
+ if (!ret)
+ goto out;
+
+ // TODO: need to fix up spill slot usage ranges to support > 1 retry
+ for (retries = 0; retries < 3; ++retries) {
+ if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
+ INFO("Retry: %i\n", retries);
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->print();
+
+ // spilling to registers may add live ranges, need to rebuild everything
+ ret = true;
+ for (sequence = func->cfg.nextSequence(), i = 0;
+ ret && i <= func->loopNestingBound;
+ sequence = func->cfg.nextSequence(), ++i)
+ ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
+ if (!ret)
+ break;
+ func->orderInstructions(this->insns);
+
+ ret = buildIntervals.run(func);
+ if (!ret)
+ break;
+ ret = gcra.allocateRegisters(insns);
+ if (ret)
+ break; // success
+ }
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
+
+ func->tlsSize = insertSpills.getStackSize();
+out:
+ return ret;
+}
+
+// TODO: check if modifying Instruction::join here breaks anything
+void
+GCRA::resolveSplitsAndMerges()
+{
+ for (std::list<Instruction *>::iterator it = splits.begin();
+ it != splits.end();
+ ++it) {
+ Instruction *split = *it;
+ unsigned int reg = regs.idToBytes(split->getSrc(0));
+ for (int d = 0; split->defExists(d); ++d) {
+ Value *v = split->getDef(d);
+ v->reg.data.id = regs.bytesToId(v, reg);
+ v->join = v;
+ reg += v->reg.size;
+ }
+ }
+ splits.clear();
+
+ for (std::list<Instruction *>::iterator it = merges.begin();
+ it != merges.end();
+ ++it) {
+ Instruction *merge = *it;
+ unsigned int reg = regs.idToBytes(merge->getDef(0));
+ for (int s = 0; merge->srcExists(s); ++s) {
+ Value *v = merge->getSrc(s);
+ v->reg.data.id = regs.bytesToId(v, reg);
+ v->join = v;
+ reg += v->reg.size;
+ }
+ }
+ merges.clear();
+}
+
+bool Program::registerAllocation()
+{
+ RegAlloc ra(this);
+ return ra.exec();
+}
+
+bool
+RegAlloc::InsertConstraintsPass::exec(Function *ir)
+{
+ constrList.clear();
+
+ bool ret = run(ir, true, true);
+ if (ret)
+ ret = insertConstraintMoves();
+ return ret;
+}
+
+// TODO: make part of texture insn
+void
+RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
+{
+ Value *def[4];
+ int c, k, d;
+ uint8_t mask = 0;
+
+ for (d = 0, k = 0, c = 0; c < 4; ++c) {
+ if (!(tex->tex.mask & (1 << c)))
+ continue;
+ if (tex->getDef(k)->refCount()) {
+ mask |= 1 << c;
+ def[d++] = tex->getDef(k);
+ }
+ ++k;
+ }
+ tex->tex.mask = mask;
+
+ for (c = 0; c < d; ++c)
+ tex->setDef(c, def[c]);
+ for (; c < 4; ++c)
+ tex->setDef(c, NULL);
+}
+
+bool
+RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
+{
+ Value *v = cst->getSrc(s);
+
+ // current register allocation can't handle it if a value participates in
+ // multiple constraints
+ for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
+ if (cst != (*it)->getInsn())
+ return true;
+ }
+
+ // can start at s + 1 because detectConflict is called on all sources
+ for (int c = s + 1; cst->srcExists(c); ++c)
+ if (v == cst->getSrc(c))
+ return true;
+
+ Instruction *defi = v->getInsn();
+
+ return (!defi || defi->constrainedDefs());
+}
+
+void
+RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
+{
+ Instruction *cst;
+ int d;
+
+ // first, look for an existing identical constraint op
+ for (std::list<Instruction *>::iterator it = constrList.begin();
+ it != constrList.end();
+ ++it) {
+ cst = (*it);
+ if (!i->bb->dominatedBy(cst->bb))
+ break;
+ for (d = 0; d < n; ++d)
+ if (cst->getSrc(d) != i->getSrc(d + s))
+ break;
+ if (d >= n) {
+ for (d = 0; d < n; ++d, ++s)
+ i->setSrc(s, cst->getDef(d));
+ return;
+ }
+ }
+ cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
+
+ for (d = 0; d < n; ++s, ++d) {
+ cst->setDef(d, new_LValue(func, FILE_GPR));
+ cst->setSrc(d, i->getSrc(s));
+ i->setSrc(s, cst->getDef(d));
+ }
+ i->bb->insertBefore(i, cst);
+
+ constrList.push_back(cst);
+}
+
+// Add a dummy use of the pointer source of >= 8 byte loads after the load
+// to prevent it from being assigned a register which overlapping the load's
+// destination, which would produce random corruptions.
+void
+RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
+{
+ Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
+ hzd->setSrc(0, src->get());
+ i->bb->insertAfter(i, hzd);
+
+}
+
+// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
+void
+RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
+{
+ uint8_t size = 0;
+ int n;
+ for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
+ size += insn->getDef(n)->reg.size;
+ if (n < 2)
+ return;
+ LValue *lval = new_LValue(func, FILE_GPR);
+ lval->reg.size = size;
+
+ Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
+ split->setSrc(0, lval);
+ for (int d = 0; d < n; ++d) {
+ split->setDef(d, insn->getDef(d));
+ insn->setDef(d, NULL);
+ }
+ insn->setDef(0, lval);
+
+ for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
+ insn->setDef(k, insn->getDef(d));
+ insn->setDef(d, NULL);
+ }
+ // carry over predicate if any (mainly for OP_UNION uses)
+ split->setPredicate(insn->cc, insn->getPredicate());
+
+ insn->bb->insertAfter(insn, split);
+ constrList.push_back(split);
+}
+void
+RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
+ const int a, const int b)
+{
+ uint8_t size = 0;
+ if (a >= b)
+ return;
+ for (int s = a; s <= b; ++s)
+ size += insn->getSrc(s)->reg.size;
+ if (!size)
+ return;
+ LValue *lval = new_LValue(func, FILE_GPR);
+ lval->reg.size = size;
+
+ Value *save[3];
+ insn->takeExtraSources(0, save);
+
+ Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
+ merge->setDef(0, lval);
+ for (int s = a, i = 0; s <= b; ++s, ++i) {
+ merge->setSrc(i, insn->getSrc(s));
+ insn->setSrc(s, NULL);
+ }
+ insn->setSrc(a, lval);
+
+ for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
+ insn->setSrc(k, insn->getSrc(s));
+ insn->setSrc(s, NULL);
+ }
+ insn->bb->insertBefore(insn, merge);
+
+ insn->putExtraSources(0, save);
+
+ constrList.push_back(merge);
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
+{
+ if (isTextureOp(tex->op))
+ textureMask(tex);
+ condenseDefs(tex);
+
+ if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
+ condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
+ } else
+ if (isTextureOp(tex->op)) {
+ int n = tex->srcCount(0xff, true);
+ if (n > 4) {
+ condenseSrcs(tex, 0, 3);
+ if (n > 5) // NOTE: first call modified positions already
+ condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
+ } else
+ if (n > 1) {
+ condenseSrcs(tex, 0, n - 1);
+ }
+ }
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
+{
+ int n, s;
+
+ textureMask(tex);
+
+ if (tex->op == OP_TXQ) {
+ s = tex->srcCount(0xff);
+ n = 0;
+ } else {
+ s = tex->tex.target.getArgCount();
+ if (!tex->tex.target.isArray() &&
+ (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
+ ++s;
+ if (tex->op == OP_TXD && tex->tex.useOffsets)
+ ++s;
+ n = tex->srcCount(0xff) - s;
+ assert(n <= 4);
+ }
+
+ if (s > 1)
+ condenseSrcs(tex, 0, s - 1);
+ if (n > 1) // NOTE: first call modified positions already
+ condenseSrcs(tex, 1, n);
+
+ condenseDefs(tex);
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
+{
+ Value *pred = tex->getPredicate();
+ if (pred)
+ tex->setPredicate(tex->cc, NULL);
+
+ textureMask(tex);
+
+ assert(tex->defExists(0) && tex->srcExists(0));
+ // make src and def count match
+ int c;
+ for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
+ if (!tex->srcExists(c))
+ tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
+ if (!tex->defExists(c))
+ tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
+ }
+ if (pred)
+ tex->setPredicate(tex->cc, pred);
+ condenseDefs(tex);
+ condenseSrcs(tex, 0, c - 1);
+}
+
+// Insert constraint markers for instructions whose multiple sources must be
+// located in consecutive registers.
+bool
+RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
+{
+ TexInstruction *tex;
+ Instruction *next;
+ int s, size;
+
+ targ = bb->getProgram()->getTarget();
+
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ if ((tex = i->asTex())) {
+ switch (targ->getChipset() & ~0xf) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ texConstraintNV50(tex);
+ break;
+ case 0xc0:
+ case 0xd0:
+ texConstraintNVC0(tex);
+ break;
+ case 0xe0:
+ case NVISA_GK110_CHIPSET:
+ texConstraintNVE0(tex);
+ break;
+ default:
+ break;
+ }
+ } else
+ if (i->op == OP_EXPORT || i->op == OP_STORE) {
+ for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
+ assert(i->srcExists(s));
+ size -= i->getSrc(s)->reg.size;
+ }
+ condenseSrcs(i, 1, s - 1);
+ } else
+ if (i->op == OP_LOAD || i->op == OP_VFETCH) {
+ condenseDefs(i);
+ if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
+ addHazard(i, i->src(0).getIndirect(0));
+ } else
+ if (i->op == OP_UNION ||
+ i->op == OP_MERGE ||
+ i->op == OP_SPLIT) {
+ constrList.push_back(i);
+ }
+ }
+ return true;
+}
+
+// Insert extra moves so that, if multiple register constraints on a value are
+// in conflict, these conflicts can be resolved.
+bool
+RegAlloc::InsertConstraintsPass::insertConstraintMoves()
+{
+ for (std::list<Instruction *>::iterator it = constrList.begin();
+ it != constrList.end();
+ ++it) {
+ Instruction *cst = *it;
+ Instruction *mov;
+
+ if (cst->op == OP_SPLIT && 0) {
+ // spilling splits is annoying, just make sure they're separate
+ for (int d = 0; cst->defExists(d); ++d) {
+ if (!cst->getDef(d)->refCount())
+ continue;
+ LValue *lval = new_LValue(func, cst->def(d).getFile());
+ const uint8_t size = cst->def(d).getSize();
+ lval->reg.size = size;
+
+ mov = new_Instruction(func, OP_MOV, typeOfSize(size));
+ mov->setSrc(0, lval);
+ mov->setDef(0, cst->getDef(d));
+ cst->setDef(d, mov->getSrc(0));
+ cst->bb->insertAfter(cst, mov);
+
+ cst->getSrc(0)->asLValue()->noSpill = 1;
+ mov->getSrc(0)->asLValue()->noSpill = 1;
+ }
+ } else
+ if (cst->op == OP_MERGE || cst->op == OP_UNION) {
+ for (int s = 0; cst->srcExists(s); ++s) {
+ const uint8_t size = cst->src(s).getSize();
+
+ if (!cst->getSrc(s)->defs.size()) {
+ mov = new_Instruction(func, OP_NOP, typeOfSize(size));
+ mov->setDef(0, cst->getSrc(s));
+ cst->bb->insertBefore(cst, mov);
+ continue;
+ }
+ assert(cst->getSrc(s)->defs.size() == 1); // still SSA
+
+ Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+ // catch some cases where don't really need MOVs
+ if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
+ continue;
+
+ LValue *lval = new_LValue(func, cst->src(s).getFile());
+ lval->reg.size = size;
+
+ mov = new_Instruction(func, OP_MOV, typeOfSize(size));
+ mov->setDef(0, lval);
+ mov->setSrc(0, cst->getSrc(s));
+ cst->setSrc(s, mov->getDef(0));
+ cst->bb->insertBefore(cst, mov);
+
+ cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
+
+ if (cst->op == OP_UNION)
+ mov->setPredicate(defi->cc, defi->getPredicate());
+ }
+ }
+ }
+
+ return true;
+}
+
+} // namespace nv50_ir