summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
diff options
context:
space:
mode:
authorJohannes Obermayr <johannesobermayr@gmx.de>2013-08-20 20:14:00 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2013-09-11 21:47:07 +0200
commit5eb7ff1175a644ffe3b0f1a75cb235400355f9fb (patch)
tree613342591e12a96725df715853a5e579ba1ec8ea /src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
parentebcdaa7bbc3a10fe59447ae77b508ee85eaa582f (diff)
Move nv30, nv50 and nvc0 to nouveau.
It is planned to ship openSUSE 13.1 with -shared libs. nouveau.la, nv30.la, nv50.la and nvc0.la are currently LIBADDs in all nouveau related targets. This change makes it possible to easily build one shared libnouveau.so which is then LIBADDed. Also dlopen will be faster for one library instead of three and build time on -jX will be reduced. Whitespace fixes were requested by 'git am'. Signed-off-by: Johannes Obermayr <johannesobermayr@gmx.de> Acked-by: Christoph Bumiller <christoph.bumiller@speed.at> Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp2050
1 files changed, 2050 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
new file mode 100644
index 00000000000..d65003ce4eb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -0,0 +1,2050 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir.h"
+#include "codegen/nv50_ir_target.h"
+
+#include <stack>
+#include <limits>
+
+namespace nv50_ir {
+
+#define MAX_REGISTER_FILE_SIZE 256
+
+class RegisterSet
+{
+public:
+ RegisterSet(const Target *);
+
+ void init(const Target *);
+ void reset(DataFile, bool resetMax = false);
+
+ void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
+ void intersect(DataFile f, const RegisterSet *);
+
+ bool assign(int32_t& reg, DataFile f, unsigned int size);
+ void release(DataFile f, int32_t reg, unsigned int size);
+ void occupy(DataFile f, int32_t reg, unsigned int size);
+ void occupy(const Value *);
+ void occupyMask(DataFile f, int32_t reg, uint8_t mask);
+ bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
+ bool testOccupy(const Value *);
+ bool testOccupy(DataFile f, int32_t reg, unsigned int size);
+
+ inline int getMaxAssigned(DataFile f) const { return fill[f]; }
+
+ inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
+ {
+ if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
+ return (last[f] + 1) / 2;
+ return last[f] + 1;
+ }
+
+ inline unsigned int units(DataFile f, unsigned int size) const
+ {
+ return size >> unit[f];
+ }
+ // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
+ inline unsigned int idToBytes(const Value *v) const
+ {
+ return v->reg.data.id * MIN2(v->reg.size, 4);
+ }
+ inline unsigned int idToUnits(const Value *v) const
+ {
+ return units(v->reg.file, idToBytes(v));
+ }
+ inline int bytesToId(Value *v, unsigned int bytes) const
+ {
+ if (v->reg.size < 4)
+ return units(v->reg.file, bytes);
+ return bytes / 4;
+ }
+ inline int unitsToId(DataFile f, int u, uint8_t size) const
+ {
+ if (u < 0)
+ return -1;
+ return (size < 4) ? u : ((u << unit[f]) / 4);
+ }
+
+ void print() const;
+
+private:
+ BitSet bits[LAST_REGISTER_FILE + 1];
+
+ int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
+
+ int last[LAST_REGISTER_FILE + 1];
+ int fill[LAST_REGISTER_FILE + 1];
+
+ const bool restrictedGPR16Range;
+};
+
+void
+RegisterSet::reset(DataFile f, bool resetMax)
+{
+ bits[f].fill(0);
+ if (resetMax)
+ fill[f] = -1;
+}
+
+void
+RegisterSet::init(const Target *targ)
+{
+ for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
+ DataFile f = static_cast<DataFile>(rf);
+ last[rf] = targ->getFileSize(f) - 1;
+ unit[rf] = targ->getFileUnit(f);
+ fill[rf] = -1;
+ assert(last[rf] < MAX_REGISTER_FILE_SIZE);
+ bits[rf].allocate(last[rf] + 1, true);
+ }
+}
+
+RegisterSet::RegisterSet(const Target *targ)
+ : restrictedGPR16Range(targ->getChipset() < 0xc0)
+{
+ init(targ);
+ for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
+ reset(static_cast<DataFile>(i));
+}
+
+void
+RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
+{
+ bits[f].periodicMask32(lock, unlock);
+}
+
+void
+RegisterSet::intersect(DataFile f, const RegisterSet *set)
+{
+ bits[f] |= set->bits[f];
+}
+
+void
+RegisterSet::print() const
+{
+ INFO("GPR:");
+ bits[FILE_GPR].print();
+ INFO("\n");
+}
+
+bool
+RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
+{
+ reg = bits[f].findFreeRange(size);
+ if (reg < 0)
+ return false;
+ fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
+ return true;
+}
+
+bool
+RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
+{
+ return bits[f].testRange(reg, size);
+}
+
+void
+RegisterSet::occupy(const Value *v)
+{
+ occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
+}
+
+void
+RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
+{
+ bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
+}
+
+void
+RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
+{
+ bits[f].setRange(reg, size);
+
+ INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
+
+ fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
+}
+
+bool
+RegisterSet::testOccupy(const Value *v)
+{
+ return testOccupy(v->reg.file,
+ idToUnits(v), v->reg.size >> unit[v->reg.file]);
+}
+
+bool
+RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
+{
+ if (isOccupied(f, reg, size))
+ return false;
+ occupy(f, reg, size);
+ return true;
+}
+
+void
+RegisterSet::release(DataFile f, int32_t reg, unsigned int size)
+{
+ bits[f].clrRange(reg, size);
+
+ INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
+}
+
+class RegAlloc
+{
+public:
+ RegAlloc(Program *program) : prog(program), sequence(0) { }
+
+ bool exec();
+ bool execFunc();
+
+private:
+ class PhiMovesPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+ };
+
+ class ArgumentMovesPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ };
+
+ class BuildIntervalsPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ void collectLiveValues(BasicBlock *);
+ void addLiveRange(Value *, const BasicBlock *, int end);
+ };
+
+ class InsertConstraintsPass : public Pass {
+ public:
+ bool exec(Function *func);
+ private:
+ virtual bool visit(BasicBlock *);
+
+ bool insertConstraintMoves();
+
+ void condenseDefs(Instruction *);
+ void condenseSrcs(Instruction *, const int first, const int last);
+
+ void addHazard(Instruction *i, const ValueRef *src);
+ void textureMask(TexInstruction *);
+ void addConstraint(Instruction *, int s, int n);
+ bool detectConflict(Instruction *, int s);
+
+ // target specific functions, TODO: put in subclass or Target
+ void texConstraintNV50(TexInstruction *);
+ void texConstraintNVC0(TexInstruction *);
+ void texConstraintNVE0(TexInstruction *);
+
+ std::list<Instruction *> constrList;
+
+ const Target *targ;
+ };
+
+ bool buildLiveSets(BasicBlock *);
+
+private:
+ Program *prog;
+ Function *func;
+
+ // instructions in control flow / chronological order
+ ArrayList insns;
+
+ int sequence; // for manual passes through CFG
+};
+
+typedef std::pair<Value *, Value *> ValuePair;
+
+class SpillCodeInserter
+{
+public:
+ SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
+
+ bool run(const std::list<ValuePair>&);
+
+ Symbol *assignSlot(const Interval&, const unsigned int size);
+ inline int32_t getStackSize() const { return stackSize; }
+
+private:
+ Function *func;
+
+ struct SpillSlot
+ {
+ Interval occup;
+ std::list<Value *> residents; // needed to recalculate occup
+ Symbol *sym;
+ int32_t offset;
+ inline uint8_t size() const { return sym->reg.size; }
+ };
+ std::list<SpillSlot> slots;
+ int32_t stackSize;
+ int32_t stackBase;
+
+ LValue *unspill(Instruction *usei, LValue *, Value *slot);
+ void spill(Instruction *defi, Value *slot, LValue *);
+};
+
+void
+RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
+ const BasicBlock *bb,
+ int end)
+{
+ Instruction *insn = val->getUniqueInsn();
+
+ if (!insn)
+ insn = bb->getFirst();
+
+ assert(bb->getFirst()->serial <= bb->getExit()->serial);
+ assert(bb->getExit()->serial + 1 >= end);
+
+ int begin = insn->serial;
+ if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
+ begin = bb->getEntry()->serial;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
+ val->id, begin, insn->serial, end);
+
+ if (begin != end) // empty ranges are only added as hazards for fixed regs
+ val->livei.extend(begin, end);
+}
+
+bool
+RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
+{
+ if (b->cfg.incidentCount() <= 1)
+ return false;
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
+ if (ei.getType() == Graph::Edge::TREE ||
+ ei.getType() == Graph::Edge::FORWARD)
+ ++n;
+ return (n == 2);
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+ Instruction *phi, *mov;
+ BasicBlock *pb, *pn;
+
+ std::stack<BasicBlock *> stack;
+
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = BasicBlock::get(ei.getNode());
+ assert(pb);
+ if (needNewElseBlock(bb, pb))
+ stack.push(pb);
+ }
+ while (!stack.empty()) {
+ pb = stack.top();
+ pn = new BasicBlock(func);
+ stack.pop();
+
+ pb->cfg.detach(&bb->cfg);
+ pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
+ pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
+
+ assert(pb->getExit()->op != OP_CALL);
+ if (pb->getExit()->asFlow()->target.bb == bb)
+ pb->getExit()->asFlow()->target.bb = pn;
+ }
+
+ // insert MOVs (phi->src(j) should stem from j-th in-BB)
+ int j = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = BasicBlock::get(ei.getNode());
+ if (!pb->isTerminated())
+ pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
+
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ mov = new_Instruction(func, OP_MOV, TYPE_U32);
+
+ mov->setSrc(0, phi->getSrc(j));
+ mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
+ phi->setSrc(j, mov->getDef(0));
+
+ pb->insertBefore(pb->getExit(), mov);
+ }
+ ++j;
+ }
+
+ return true;
+}
+
+bool
+RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
+{
+ // Bind function call inputs/outputs to the same physical register
+ // the callee uses, inserting moves as appropriate for the case a
+ // conflict arises.
+ for (Instruction *i = bb->getEntry(); i; i = i->next) {
+ FlowInstruction *cal = i->asFlow();
+ // TODO: Handle indirect calls.
+ // Right now they should only be generated for builtins.
+ if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect)
+ continue;
+ RegisterSet clobberSet(prog->getTarget());
+
+ // Bind input values.
+ for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) {
+ const int t = cal->indirect ? (s - 1) : s;
+ LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
+ tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id;
+
+ Instruction *mov =
+ new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
+ mov->setDef(0, tmp);
+ mov->setSrc(0, cal->getSrc(s));
+ cal->setSrc(s, tmp);
+
+ bb->insertBefore(cal, mov);
+ }
+
+ // Bind output values.
+ for (int d = 0; cal->defExists(d); ++d) {
+ LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
+ tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
+
+ Instruction *mov =
+ new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
+ mov->setSrc(0, tmp);
+ mov->setDef(0, cal->getDef(d));
+ cal->setDef(d, tmp);
+
+ bb->insertAfter(cal, mov);
+ clobberSet.occupy(tmp);
+ }
+
+ // Bind clobbered values.
+ for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
+ it != cal->target.fn->clobbers.end();
+ ++it) {
+ if (clobberSet.testOccupy(*it)) {
+ Value *tmp = new_LValue(func, (*it)->asLValue());
+ tmp->reg.data.id = (*it)->reg.data.id;
+ cal->setDef(cal->defCount(), tmp);
+ }
+ }
+ }
+
+ // Update the clobber set of the function.
+ if (BasicBlock::get(func->cfgExit) == bb) {
+ func->buildDefSets();
+ for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
+ if (bb->defSet.test(i))
+ func->clobbers.push_back(func->getLValue(i));
+ }
+
+ return true;
+}
+
+// Build the set of live-in variables of bb.
+bool
+RegAlloc::buildLiveSets(BasicBlock *bb)
+{
+ Function *f = bb->getFunction();
+ BasicBlock *bn;
+ Instruction *i;
+ unsigned int s, d;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
+
+ bb->liveSet.allocate(func->allLValues.getSize(), false);
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ bn = BasicBlock::get(ei.getNode());
+ if (bn == bb)
+ continue;
+ if (bn->cfg.visit(sequence))
+ if (!buildLiveSets(bn))
+ return false;
+ if (n++ || bb->liveSet.marker)
+ bb->liveSet |= bn->liveSet;
+ else
+ bb->liveSet = bn->liveSet;
+ }
+ if (!n && !bb->liveSet.marker)
+ bb->liveSet.fill(0);
+ bb->liveSet.marker = true;
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set of out blocks:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ // if (!bb->getEntry())
+ // return true;
+
+ if (bb == BasicBlock::get(f->cfgExit)) {
+ for (std::deque<ValueRef>::iterator it = f->outs.begin();
+ it != f->outs.end(); ++it) {
+ assert(it->get()->asLValue());
+ bb->liveSet.set(it->get()->id);
+ }
+ }
+
+ for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
+ for (d = 0; i->defExists(d); ++d)
+ bb->liveSet.clr(i->getDef(d)->id);
+ for (s = 0; i->srcExists(s); ++s)
+ if (i->getSrc(s)->asLValue())
+ bb->liveSet.set(i->getSrc(s)->id);
+ }
+ for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set after propagation:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ return true;
+}
+
+void
+RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
+{
+ BasicBlock *bbA = NULL, *bbB = NULL;
+
+ if (bb->cfg.outgoingCount()) {
+ // trickery to save a loop of OR'ing liveSets
+ // aliasing works fine with BitSet::setOr
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ if (ei.getType() == Graph::Edge::DUMMY)
+ continue;
+ if (bbA) {
+ bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
+ bbA = bb;
+ } else {
+ bbA = bbB;
+ }
+ bbB = BasicBlock::get(ei.getNode());
+ }
+ bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
+ } else
+ if (bb->cfg.incidentCount()) {
+ bb->liveSet.fill(0);
+ }
+}
+
+bool
+RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
+{
+ collectLiveValues(bb);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
+
+ // go through out blocks and delete phi sources that do not originate from
+ // the current block from the live set
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ BasicBlock *out = BasicBlock::get(ei.getNode());
+
+ for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ assert(i->src(s).getInsn());
+ if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
+ bb->liveSet.set(i->getSrc(s)->id);
+ else
+ bb->liveSet.clr(i->getSrc(s)->id);
+ }
+ }
+ }
+
+ // remaining live-outs are live until end
+ if (bb->getExit()) {
+ for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
+ if (bb->liveSet.test(j))
+ addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
+ }
+
+ for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
+ for (int d = 0; i->defExists(d); ++d) {
+ bb->liveSet.clr(i->getDef(d)->id);
+ if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
+ i->getDef(d)->livei.extend(i->serial, i->serial);
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ if (!i->getSrc(s)->asLValue())
+ continue;
+ if (!bb->liveSet.test(i->getSrc(s)->id)) {
+ bb->liveSet.set(i->getSrc(s)->id);
+ addLiveRange(i->getSrc(s), bb, i->serial);
+ }
+ }
+ }
+
+ if (bb == BasicBlock::get(func->cfg.getRoot())) {
+ for (std::deque<ValueDef>::iterator it = func->ins.begin();
+ it != func->ins.end(); ++it) {
+ if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
+ it->get()->livei.extend(0, 1);
+ }
+ }
+
+ return true;
+}
+
+
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_UNION (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_TEX (1 << 3)
+
+class GCRA
+{
+public:
+ GCRA(Function *, SpillCodeInserter&);
+ ~GCRA();
+
+ bool allocateRegisters(ArrayList& insns);
+
+ void printNodeInfo() const;
+
+private:
+ class RIG_Node : public Graph::Node
+ {
+ public:
+ RIG_Node();
+
+ void init(const RegisterSet&, LValue *);
+
+ void addInterference(RIG_Node *);
+ void addRegPreference(RIG_Node *);
+
+ inline LValue *getValue() const
+ {
+ return reinterpret_cast<LValue *>(data);
+ }
+ inline void setValue(LValue *lval) { data = lval; }
+
+ inline uint8_t getCompMask() const
+ {
+ return ((1 << colors) - 1) << (reg & 7);
+ }
+
+ static inline RIG_Node *get(const Graph::EdgeIterator& ei)
+ {
+ return static_cast<RIG_Node *>(ei.getNode());
+ }
+
+ public:
+ uint32_t degree;
+ uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
+ uint16_t colors;
+
+ DataFile f;
+ int32_t reg;
+
+ float weight;
+
+ // list pointers for simplify() phase
+ RIG_Node *next;
+ RIG_Node *prev;
+
+ // union of the live intervals of all coalesced values (we want to retain
+ // the separate intervals for testing interference of compound values)
+ Interval livei;
+
+ std::list<RIG_Node *> prefRegs;
+ };
+
+private:
+ inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
+
+ void buildRIG(ArrayList&);
+ bool coalesce(ArrayList&);
+ bool doCoalesce(ArrayList&, unsigned int mask);
+ void calculateSpillWeights();
+ void simplify();
+ bool selectRegisters();
+ void cleanup(const bool success);
+
+ void simplifyEdge(RIG_Node *, RIG_Node *);
+ void simplifyNode(RIG_Node *);
+
+ bool coalesceValues(Value *, Value *, bool force);
+ void resolveSplitsAndMerges();
+ void makeCompound(Instruction *, bool isSplit);
+
+ inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
+
+ inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
+ void checkList(std::list<RIG_Node *>&);
+
+private:
+ std::stack<uint32_t> stack;
+
+ // list headers for simplify() phase
+ RIG_Node lo[2];
+ RIG_Node hi;
+
+ Graph RIG;
+ RIG_Node *nodes;
+ unsigned int nodeCount;
+
+ Function *func;
+ Program *prog;
+
+ static uint8_t relDegree[17][17];
+
+ RegisterSet regs;
+
+ // need to fixup register id for participants of OP_MERGE/SPLIT
+ std::list<Instruction *> merges;
+ std::list<Instruction *> splits;
+
+ SpillCodeInserter& spill;
+ std::list<ValuePair> mustSpill;
+};
+
+uint8_t GCRA::relDegree[17][17];
+
+GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
+{
+ colors = 0;
+}
+
+void
+GCRA::printNodeInfo() const
+{
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ if (!nodes[i].colors)
+ continue;
+ INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
+ i,
+ nodes[i].f,nodes[i].reg,nodes[i].colors,
+ nodes[i].weight,
+ nodes[i].degree, nodes[i].degreeLimit);
+
+ for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
+ INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
+ for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
+ INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
+ INFO("\n");
+ }
+}
+
+void
+GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
+{
+ setValue(lval);
+ if (lval->reg.data.id >= 0)
+ lval->noSpill = lval->fixedReg = 1;
+
+ colors = regs.units(lval->reg.file, lval->reg.size);
+ f = lval->reg.file;
+ reg = -1;
+ if (lval->reg.data.id >= 0)
+ reg = regs.idToUnits(lval);
+
+ weight = std::numeric_limits<float>::infinity();
+ degree = 0;
+ degreeLimit = regs.getFileSize(f, lval->reg.size);
+
+ livei.insert(lval->livei);
+}
+
+bool
+GCRA::coalesceValues(Value *dst, Value *src, bool force)
+{
+ LValue *rep = dst->join->asLValue();
+ LValue *val = src->join->asLValue();
+
+ if (!force && val->reg.data.id >= 0) {
+ rep = src->join->asLValue();
+ val = dst->join->asLValue();
+ }
+ RIG_Node *nRep = &nodes[rep->id];
+ RIG_Node *nVal = &nodes[val->id];
+
+ if (src->reg.file != dst->reg.file) {
+ if (!force)
+ return false;
+ WARN("forced coalescing of values in different files !\n");
+ }
+ if (!force && dst->reg.size != src->reg.size)
+ return false;
+
+ if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
+ if (force) {
+ if (val->reg.data.id >= 0)
+ WARN("forced coalescing of values in different fixed regs !\n");
+ } else {
+ if (val->reg.data.id >= 0)
+ return false;
+ // make sure that there is no overlap with the fixed register of rep
+ for (ArrayList::Iterator it = func->allLValues.iterator();
+ !it.end(); it.next()) {
+ Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
+ assert(reg);
+ if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
+ return false;
+ }
+ }
+ }
+
+ if (!force && nRep->livei.overlaps(nVal->livei))
+ return false;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
+ rep->id, rep->reg.data.id, val->id);
+
+ // set join pointer of all values joined with val
+ for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
+ ++def)
+ (*def)->get()->join = rep;
+ assert(rep->join == rep && val->join == rep);
+
+ // add val's definitions to rep and extend the live interval of its RIG node
+ rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
+ nRep->livei.unify(nVal->livei);
+ return true;
+}
+
+bool
+GCRA::coalesce(ArrayList& insns)
+{
+ bool ret = doCoalesce(insns, JOIN_MASK_PHI);
+ if (!ret)
+ return false;
+ switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
+ break;
+ case 0xc0:
+ case 0xd0:
+ case 0xe0:
+ ret = doCoalesce(insns, JOIN_MASK_UNION);
+ break;
+ default:
+ break;
+ }
+ if (!ret)
+ return false;
+ return doCoalesce(insns, JOIN_MASK_MOV);
+}
+
+static inline uint8_t makeCompMask(int compSize, int base, int size)
+{
+ uint8_t m = ((1 << size) - 1) << base;
+
+ switch (compSize) {
+ case 1:
+ return 0xff;
+ case 2:
+ m |= (m << 2);
+ return (m << 4) | m;
+ case 3:
+ case 4:
+ return (m << 4) | m;
+ default:
+ assert(compSize <= 8);
+ return m;
+ }
+}
+
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
+static inline void copyCompound(Value *dst, Value *src)
+{
+ LValue *ldst = dst->asLValue();
+ LValue *lsrc = src->asLValue();
+
+ if (ldst->compound && !lsrc->compound) {
+ LValue *swap = lsrc;
+ lsrc = ldst;
+ ldst = swap;
+ }
+
+ ldst->compound = lsrc->compound;
+ ldst->compMask = lsrc->compMask;
+}
+
+void
+GCRA::makeCompound(Instruction *insn, bool split)
+{
+ LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("makeCompound(split = %i): ", split);
+ insn->print();
+ }
+
+ const unsigned int size = getNode(rep)->colors;
+ unsigned int base = 0;
+
+ if (!rep->compound)
+ rep->compMask = 0xff;
+ rep->compound = 1;
+
+ for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
+ LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
+
+ val->compound = 1;
+ if (!val->compMask)
+ val->compMask = 0xff;
+ val->compMask &= makeCompMask(size, base, getNode(val)->colors);
+ assert(val->compMask);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
+ rep->id, rep->compMask, val->id, val->compMask);
+
+ base += getNode(val)->colors;
+ }
+ assert(base == size);
+}
+
+bool
+GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
+{
+ int c, n;
+
+ for (n = 0; n < insns.getSize(); ++n) {
+ Instruction *i;
+ Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
+
+ switch (insn->op) {
+ case OP_PHI:
+ if (!(mask & JOIN_MASK_PHI))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
+ // this is bad
+ ERROR("failed to coalesce phi operands\n");
+ return false;
+ }
+ break;
+ case OP_UNION:
+ case OP_MERGE:
+ if (!(mask & JOIN_MASK_UNION))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ coalesceValues(insn->getDef(0), insn->getSrc(c), true);
+ if (insn->op == OP_MERGE) {
+ merges.push_back(insn);
+ if (insn->srcExists(1))
+ makeCompound(insn, false);
+ }
+ break;
+ case OP_SPLIT:
+ if (!(mask & JOIN_MASK_UNION))
+ break;
+ splits.push_back(insn);
+ for (c = 0; insn->defExists(c); ++c)
+ coalesceValues(insn->getSrc(0), insn->getDef(c), true);
+ makeCompound(insn, true);
+ break;
+ case OP_MOV:
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ i = NULL;
+ if (!insn->getDef(0)->uses.empty())
+ i = insn->getDef(0)->uses.front()->getInsn();
+ // if this is a contraint-move there will only be a single use
+ if (i && i->op == OP_MERGE) // do we really still need this ?
+ break;
+ i = insn->getSrc(0)->getUniqueInsn();
+ if (i && !i->constrainedDefs()) {
+ if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
+ copyCompound(insn->getSrc(0), insn->getDef(0));
+ }
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXF:
+ case OP_TXQ:
+ case OP_TXD:
+ case OP_TXG:
+ case OP_TEXCSAA:
+ if (!(mask & JOIN_MASK_TEX))
+ break;
+ for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
+ coalesceValues(insn->getDef(c), insn->getSrc(c), true);
+ break;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+void
+GCRA::RIG_Node::addInterference(RIG_Node *node)
+{
+ this->degree += relDegree[node->colors][colors];
+ node->degree += relDegree[colors][node->colors];
+
+ this->attach(node, Graph::Edge::CROSS);
+}
+
+void
+GCRA::RIG_Node::addRegPreference(RIG_Node *node)
+{
+ prefRegs.push_back(node);
+}
+
+GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
+ func(fn),
+ regs(fn->getProgram()->getTarget()),
+ spill(spill)
+{
+ prog = func->getProgram();
+
+ // initialize relative degrees array - i takes away from j
+ for (int i = 1; i <= 16; ++i)
+ for (int j = 1; j <= 16; ++j)
+ relDegree[i][j] = j * ((i + j - 1) / j);
+}
+
+GCRA::~GCRA()
+{
+ if (nodes)
+ delete[] nodes;
+}
+
+void
+GCRA::checkList(std::list<RIG_Node *>& lst)
+{
+ GCRA::RIG_Node *prev = NULL;
+
+ for (std::list<RIG_Node *>::iterator it = lst.begin();
+ it != lst.end();
+ ++it) {
+ assert((*it)->getValue()->join == (*it)->getValue());
+ if (prev)
+ assert(prev->livei.begin() <= (*it)->livei.begin());
+ prev = *it;
+ }
+}
+
+void
+GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
+{
+ if (node->livei.isEmpty())
+ return;
+ // only the intervals of joined values don't necessarily arrive in order
+ std::list<RIG_Node *>::iterator prev, it;
+ for (it = list.end(); it != list.begin(); it = prev) {
+ prev = it;
+ --prev;
+ if ((*prev)->livei.begin() <= node->livei.begin())
+ break;
+ }
+ list.insert(it, node);
+}
+
+void
+GCRA::buildRIG(ArrayList& insns)
+{
+ std::list<RIG_Node *> values, active;
+
+ for (std::deque<ValueDef>::iterator it = func->ins.begin();
+ it != func->ins.end(); ++it)
+ insertOrderedTail(values, getNode(it->get()->asLValue()));
+
+ for (int i = 0; i < insns.getSize(); ++i) {
+ Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
+ for (int d = 0; insn->defExists(d); ++d)
+ if (insn->getDef(d)->rep() == insn->getDef(d))
+ insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
+ }
+ checkList(values);
+
+ while (!values.empty()) {
+ RIG_Node *cur = values.front();
+
+ for (std::list<RIG_Node *>::iterator it = active.begin();
+ it != active.end();) {
+ RIG_Node *node = *it;
+
+ if (node->livei.end() <= cur->livei.begin()) {
+ it = active.erase(it);
+ } else {
+ if (node->f == cur->f && node->livei.overlaps(cur->livei))
+ cur->addInterference(node);
+ ++it;
+ }
+ }
+ values.pop_front();
+ active.push_back(cur);
+ }
+}
+
+void
+GCRA::calculateSpillWeights()
+{
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ RIG_Node *const n = &nodes[i];
+ if (!nodes[i].colors || nodes[i].livei.isEmpty())
+ continue;
+ if (nodes[i].reg >= 0) {
+ // update max reg
+ regs.occupy(n->f, n->reg, n->colors);
+ continue;
+ }
+ LValue *val = nodes[i].getValue();
+
+ if (!val->noSpill) {
+ int rc = 0;
+ for (Value::DefIterator it = val->defs.begin();
+ it != val->defs.end();
+ ++it)
+ rc += (*it)->get()->refCount();
+
+ nodes[i].weight =
+ (float)rc * (float)rc / (float)nodes[i].livei.extent();
+ }
+
+ if (nodes[i].degree < nodes[i].degreeLimit) {
+ int l = 0;
+ if (val->reg.size > 4)
+ l = 1;
+ DLLIST_ADDHEAD(&lo[l], &nodes[i]);
+ } else {
+ DLLIST_ADDHEAD(&hi, &nodes[i]);
+ }
+ }
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ printNodeInfo();
+}
+
+void
+GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
+{
+ bool move = b->degree >= b->degreeLimit;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
+ a->getValue()->id, a->degree, a->degreeLimit,
+ b->getValue()->id, b->degree, b->degreeLimit);
+
+ b->degree -= relDegree[a->colors][b->colors];
+
+ move = move && b->degree < b->degreeLimit;
+ if (move && !DLLIST_EMPTY(b)) {
+ int l = (b->getValue()->reg.size > 4) ? 1 : 0;
+ DLLIST_DEL(b);
+ DLLIST_ADDTAIL(&lo[l], b);
+ }
+}
+
+void
+GCRA::simplifyNode(RIG_Node *node)
+{
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
+ simplifyEdge(node, RIG_Node::get(ei));
+
+ for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
+ simplifyEdge(node, RIG_Node::get(ei));
+
+ DLLIST_DEL(node);
+ stack.push(node->getValue()->id);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
+ node->getValue()->id,
+ (node->degree < node->degreeLimit) ? "" : "(spill)");
+}
+
+void
+GCRA::simplify()
+{
+ for (;;) {
+ if (!DLLIST_EMPTY(&lo[0])) {
+ do {
+ simplifyNode(lo[0].next);
+ } while (!DLLIST_EMPTY(&lo[0]));
+ } else
+ if (!DLLIST_EMPTY(&lo[1])) {
+ simplifyNode(lo[1].next);
+ } else
+ if (!DLLIST_EMPTY(&hi)) {
+ RIG_Node *best = hi.next;
+ float bestScore = best->weight / (float)best->degree;
+ // spill candidate
+ for (RIG_Node *it = best->next; it != &hi; it = it->next) {
+ float score = it->weight / (float)it->degree;
+ if (score < bestScore) {
+ best = it;
+ bestScore = score;
+ }
+ }
+ if (isinf(bestScore)) {
+ ERROR("no viable spill candidates left\n");
+ break;
+ }
+ simplifyNode(best);
+ } else {
+ break;
+ }
+ }
+}
+
+void
+GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
+{
+ const RIG_Node *intf = RIG_Node::get(ei);
+
+ if (intf->reg < 0)
+ return;
+ const LValue *vA = node->getValue();
+ const LValue *vB = intf->getValue();
+
+ const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
+
+ if (vA->compound | vB->compound) {
+ // NOTE: this only works for >aligned< register tuples !
+ for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
+ for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
+ const LValue *vD = (*D)->get()->asLValue();
+ const LValue *vd = (*d)->get()->asLValue();
+
+ if (!vD->livei.overlaps(vd->livei)) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
+ vD->id, vd->id);
+ continue;
+ }
+
+ uint8_t mask = vD->compound ? vD->compMask : ~0;
+ if (vd->compound) {
+ assert(vB->compound);
+ mask &= vd->compMask & vB->compMask;
+ } else {
+ mask &= intfMask;
+ }
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
+ vD->id,
+ vD->compound ? vD->compMask : 0xff,
+ vd->id,
+ vd->compound ? vd->compMask : intfMask,
+ vB->compMask, intf->reg & ~7, mask);
+ if (mask)
+ regs.occupyMask(node->f, intf->reg & ~7, mask);
+ }
+ }
+ } else {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "(%%%i) X (%%%i): $r%i + %u\n",
+ vA->id, vB->id, intf->reg, intf->colors);
+ regs.occupy(node->f, intf->reg, intf->colors);
+ }
+}
+
+bool
+GCRA::selectRegisters()
+{
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
+
+ while (!stack.empty()) {
+ RIG_Node *node = &nodes[stack.top()];
+ stack.pop();
+
+ regs.reset(node->f);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
+ node->getValue()->id, node->colors);
+
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
+ checkInterference(node, ei);
+ for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
+ checkInterference(node, ei);
+
+ if (!node->prefRegs.empty()) {
+ for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
+ it != node->prefRegs.end();
+ ++it) {
+ if ((*it)->reg >= 0 &&
+ regs.testOccupy(node->f, (*it)->reg, node->colors)) {
+ node->reg = (*it)->reg;
+ break;
+ }
+ }
+ }
+ if (node->reg >= 0)
+ continue;
+ LValue *lval = node->getValue();
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ regs.print();
+ bool ret = regs.assign(node->reg, node->f, node->colors);
+ if (ret) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
+ lval->compMask = node->getCompMask();
+ } else {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
+ lval->id, lval->reg.size);
+ Symbol *slot = NULL;
+ if (lval->reg.file == FILE_GPR)
+ slot = spill.assignSlot(node->livei, lval->reg.size);
+ mustSpill.push_back(ValuePair(lval, slot));
+ }
+ }
+ if (!mustSpill.empty())
+ return false;
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ LValue *lval = nodes[i].getValue();
+ if (nodes[i].reg >= 0 && nodes[i].colors > 0)
+ lval->reg.data.id =
+ regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
+ }
+ return true;
+}
+
+bool
+GCRA::allocateRegisters(ArrayList& insns)
+{
+ bool ret;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "allocateRegisters to %u instructions\n", insns.getSize());
+
+ nodeCount = func->allLValues.getSize();
+ nodes = new RIG_Node[nodeCount];
+ if (!nodes)
+ return false;
+ for (unsigned int i = 0; i < nodeCount; ++i) {
+ LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
+ if (lval) {
+ nodes[i].init(regs, lval);
+ RIG.insert(&nodes[i]);
+ }
+ }
+
+ // coalesce first, we use only 1 RIG node for a group of joined values
+ ret = coalesce(insns);
+ if (!ret)
+ goto out;
+
+ if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->printLiveIntervals();
+
+ buildRIG(insns);
+ calculateSpillWeights();
+ simplify();
+
+ ret = selectRegisters();
+ if (!ret) {
+ INFO_DBG(prog->dbgFlags, REG_ALLOC,
+ "selectRegisters failed, inserting spill code ...\n");
+ regs.reset(FILE_GPR, true);
+ spill.run(mustSpill);
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->print();
+ } else {
+ prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
+ }
+
+out:
+ cleanup(ret);
+ return ret;
+}
+
+void
+GCRA::cleanup(const bool success)
+{
+ mustSpill.clear();
+
+ for (ArrayList::Iterator it = func->allLValues.iterator();
+ !it.end(); it.next()) {
+ LValue *lval = reinterpret_cast<LValue *>(it.get());
+
+ lval->livei.clear();
+
+ lval->compound = 0;
+ lval->compMask = 0;
+
+ if (lval->join == lval)
+ continue;
+
+ if (success) {
+ lval->reg.data.id = lval->join->reg.data.id;
+ } else {
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
+ ++d)
+ lval->join->defs.remove(*d);
+ lval->join = lval;
+ }
+ }
+
+ if (success)
+ resolveSplitsAndMerges();
+ splits.clear(); // avoid duplicate entries on next coalesce pass
+ merges.clear();
+
+ delete[] nodes;
+ nodes = NULL;
+}
+
+Symbol *
+SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
+{
+ SpillSlot slot;
+ int32_t offsetBase = stackSize;
+ int32_t offset;
+ std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
+
+ if (offsetBase % size)
+ offsetBase += size - (offsetBase % size);
+
+ slot.sym = NULL;
+
+ for (offset = offsetBase; offset < stackSize; offset += size) {
+ const int32_t entryEnd = offset + size;
+ while (it != slots.end() && it->offset < offset)
+ ++it;
+ if (it == slots.end()) // no slots left
+ break;
+ std::list<SpillSlot>::iterator bgn = it;
+
+ while (it != slots.end() && it->offset < entryEnd) {
+ it->occup.print();
+ if (it->occup.overlaps(livei))
+ break;
+ ++it;
+ }
+ if (it == slots.end() || it->offset >= entryEnd) {
+ // fits
+ for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
+ bgn->occup.insert(livei);
+ if (bgn->size() == size)
+ slot.sym = bgn->sym;
+ }
+ break;
+ }
+ }
+ if (!slot.sym) {
+ stackSize = offset + size;
+ slot.offset = offset;
+ slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
+ if (!func->stackPtr)
+ offset += func->tlsBase;
+ slot.sym->setAddress(NULL, offset);
+ slot.sym->reg.size = size;
+ slots.insert(pos, slot)->occup.insert(livei);
+ }
+ return slot.sym;
+}
+
+void
+SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
+{
+ const DataType ty = typeOfSize(slot->reg.size);
+
+ Instruction *st;
+ if (slot->reg.file == FILE_MEMORY_LOCAL) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ lval->noSpill = 1;
+ } else {
+ st = new_Instruction(func, OP_CVT, ty);
+ st->setDef(0, slot);
+ st->setSrc(0, lval);
+ }
+ defi->bb->insertAfter(defi, st);
+}
+
+LValue *
+SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
+{
+ const DataType ty = typeOfSize(slot->reg.size);
+
+ lval = cloneShallow(func, lval);
+
+ Instruction *ld;
+ if (slot->reg.file == FILE_MEMORY_LOCAL) {
+ lval->noSpill = 1;
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_CVT, ty);
+ }
+ ld->setDef(0, lval);
+ ld->setSrc(0, slot);
+
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+}
+
+bool
+SpillCodeInserter::run(const std::list<ValuePair>& lst)
+{
+ for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
+ ++it) {
+ LValue *lval = it->first->asLValue();
+ Symbol *mem = it->second ? it->second->asSym() : NULL;
+
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
+ ++d) {
+ Value *slot = mem ?
+ static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
+ Value *tmp = NULL;
+ Instruction *last = NULL;
+
+ LValue *dval = (*d)->get()->asLValue();
+ Instruction *defi = (*d)->getInsn();
+
+ // handle uses first or they'll contain the spill stores
+ while (!dval->uses.empty()) {
+ ValueRef *u = dval->uses.front();
+ Instruction *usei = u->getInsn();
+ assert(usei);
+ if (usei->op == OP_PHI) {
+ tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
+ last = NULL;
+ } else
+ if (!last || usei != last->next) { // TODO: sort uses
+ tmp = unspill(usei, dval, slot);
+ last = usei;
+ }
+ u->set(tmp);
+ }
+
+ assert(defi);
+ if (defi->op == OP_PHI) {
+ d = lval->defs.erase(d);
+ --d;
+ if (slot->reg.file == FILE_MEMORY_LOCAL)
+ delete_Instruction(func->getProgram(), defi);
+ else
+ defi->setDef(0, slot);
+ } else {
+ spill(defi, slot, dval);
+ }
+ }
+
+ }
+
+ // TODO: We're not trying to reuse old slots in a potential next iteration.
+ // We have to update the slots' livei intervals to be able to do that.
+ stackBase = stackSize;
+ slots.clear();
+ return true;
+}
+
+bool
+RegAlloc::exec()
+{
+ for (IteratorRef it = prog->calls.iteratorDFS(false);
+ !it->end(); it->next()) {
+ func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
+
+ func->tlsBase = prog->tlsSize;
+ if (!execFunc())
+ return false;
+ prog->tlsSize += func->tlsSize;
+ }
+ return true;
+}
+
+bool
+RegAlloc::execFunc()
+{
+ InsertConstraintsPass insertConstr;
+ PhiMovesPass insertPhiMoves;
+ ArgumentMovesPass insertArgMoves;
+ BuildIntervalsPass buildIntervals;
+ SpillCodeInserter insertSpills(func);
+
+ GCRA gcra(func, insertSpills);
+
+ unsigned int i, retries;
+ bool ret;
+
+ if (!func->ins.empty()) {
+ // Insert a nop at the entry so inputs only used by the first instruction
+ // don't count as having an empty live range.
+ Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE);
+ BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
+ }
+
+ ret = insertConstr.exec(func);
+ if (!ret)
+ goto out;
+
+ ret = insertPhiMoves.run(func);
+ if (!ret)
+ goto out;
+
+ ret = insertArgMoves.run(func);
+ if (!ret)
+ goto out;
+
+ // TODO: need to fix up spill slot usage ranges to support > 1 retry
+ for (retries = 0; retries < 3; ++retries) {
+ if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
+ INFO("Retry: %i\n", retries);
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
+ func->print();
+
+ // spilling to registers may add live ranges, need to rebuild everything
+ ret = true;
+ for (sequence = func->cfg.nextSequence(), i = 0;
+ ret && i <= func->loopNestingBound;
+ sequence = func->cfg.nextSequence(), ++i)
+ ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
+ if (!ret)
+ break;
+ func->orderInstructions(this->insns);
+
+ ret = buildIntervals.run(func);
+ if (!ret)
+ break;
+ ret = gcra.allocateRegisters(insns);
+ if (ret)
+ break; // success
+ }
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
+
+ func->tlsSize = insertSpills.getStackSize();
+out:
+ return ret;
+}
+
+// TODO: check if modifying Instruction::join here breaks anything
+void
+GCRA::resolveSplitsAndMerges()
+{
+ for (std::list<Instruction *>::iterator it = splits.begin();
+ it != splits.end();
+ ++it) {
+ Instruction *split = *it;
+ unsigned int reg = regs.idToBytes(split->getSrc(0));
+ for (int d = 0; split->defExists(d); ++d) {
+ Value *v = split->getDef(d);
+ v->reg.data.id = regs.bytesToId(v, reg);
+ v->join = v;
+ reg += v->reg.size;
+ }
+ }
+ splits.clear();
+
+ for (std::list<Instruction *>::iterator it = merges.begin();
+ it != merges.end();
+ ++it) {
+ Instruction *merge = *it;
+ unsigned int reg = regs.idToBytes(merge->getDef(0));
+ for (int s = 0; merge->srcExists(s); ++s) {
+ Value *v = merge->getSrc(s);
+ v->reg.data.id = regs.bytesToId(v, reg);
+ v->join = v;
+ reg += v->reg.size;
+ }
+ }
+ merges.clear();
+}
+
+bool Program::registerAllocation()
+{
+ RegAlloc ra(this);
+ return ra.exec();
+}
+
+bool
+RegAlloc::InsertConstraintsPass::exec(Function *ir)
+{
+ constrList.clear();
+
+ bool ret = run(ir, true, true);
+ if (ret)
+ ret = insertConstraintMoves();
+ return ret;
+}
+
+// TODO: make part of texture insn
+void
+RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
+{
+ Value *def[4];
+ int c, k, d;
+ uint8_t mask = 0;
+
+ for (d = 0, k = 0, c = 0; c < 4; ++c) {
+ if (!(tex->tex.mask & (1 << c)))
+ continue;
+ if (tex->getDef(k)->refCount()) {
+ mask |= 1 << c;
+ def[d++] = tex->getDef(k);
+ }
+ ++k;
+ }
+ tex->tex.mask = mask;
+
+ for (c = 0; c < d; ++c)
+ tex->setDef(c, def[c]);
+ for (; c < 4; ++c)
+ tex->setDef(c, NULL);
+}
+
+bool
+RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
+{
+ Value *v = cst->getSrc(s);
+
+ // current register allocation can't handle it if a value participates in
+ // multiple constraints
+ for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
+ if (cst != (*it)->getInsn())
+ return true;
+ }
+
+ // can start at s + 1 because detectConflict is called on all sources
+ for (int c = s + 1; cst->srcExists(c); ++c)
+ if (v == cst->getSrc(c))
+ return true;
+
+ Instruction *defi = v->getInsn();
+
+ return (!defi || defi->constrainedDefs());
+}
+
+void
+RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
+{
+ Instruction *cst;
+ int d;
+
+ // first, look for an existing identical constraint op
+ for (std::list<Instruction *>::iterator it = constrList.begin();
+ it != constrList.end();
+ ++it) {
+ cst = (*it);
+ if (!i->bb->dominatedBy(cst->bb))
+ break;
+ for (d = 0; d < n; ++d)
+ if (cst->getSrc(d) != i->getSrc(d + s))
+ break;
+ if (d >= n) {
+ for (d = 0; d < n; ++d, ++s)
+ i->setSrc(s, cst->getDef(d));
+ return;
+ }
+ }
+ cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
+
+ for (d = 0; d < n; ++s, ++d) {
+ cst->setDef(d, new_LValue(func, FILE_GPR));
+ cst->setSrc(d, i->getSrc(s));
+ i->setSrc(s, cst->getDef(d));
+ }
+ i->bb->insertBefore(i, cst);
+
+ constrList.push_back(cst);
+}
+
+// Add a dummy use of the pointer source of >= 8 byte loads after the load
+// to prevent it from being assigned a register which overlapping the load's
+// destination, which would produce random corruptions.
+void
+RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
+{
+ Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
+ hzd->setSrc(0, src->get());
+ i->bb->insertAfter(i, hzd);
+
+}
+
+// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
+void
+RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
+{
+ uint8_t size = 0;
+ int n;
+ for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
+ size += insn->getDef(n)->reg.size;
+ if (n < 2)
+ return;
+ LValue *lval = new_LValue(func, FILE_GPR);
+ lval->reg.size = size;
+
+ Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
+ split->setSrc(0, lval);
+ for (int d = 0; d < n; ++d) {
+ split->setDef(d, insn->getDef(d));
+ insn->setDef(d, NULL);
+ }
+ insn->setDef(0, lval);
+
+ for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
+ insn->setDef(k, insn->getDef(d));
+ insn->setDef(d, NULL);
+ }
+ // carry over predicate if any (mainly for OP_UNION uses)
+ split->setPredicate(insn->cc, insn->getPredicate());
+
+ insn->bb->insertAfter(insn, split);
+ constrList.push_back(split);
+}
+void
+RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
+ const int a, const int b)
+{
+ uint8_t size = 0;
+ if (a >= b)
+ return;
+ for (int s = a; s <= b; ++s)
+ size += insn->getSrc(s)->reg.size;
+ if (!size)
+ return;
+ LValue *lval = new_LValue(func, FILE_GPR);
+ lval->reg.size = size;
+
+ Value *save[3];
+ insn->takeExtraSources(0, save);
+
+ Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
+ merge->setDef(0, lval);
+ for (int s = a, i = 0; s <= b; ++s, ++i) {
+ merge->setSrc(i, insn->getSrc(s));
+ insn->setSrc(s, NULL);
+ }
+ insn->setSrc(a, lval);
+
+ for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
+ insn->setSrc(k, insn->getSrc(s));
+ insn->setSrc(s, NULL);
+ }
+ insn->bb->insertBefore(insn, merge);
+
+ insn->putExtraSources(0, save);
+
+ constrList.push_back(merge);
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
+{
+ if (isTextureOp(tex->op))
+ textureMask(tex);
+ condenseDefs(tex);
+
+ if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
+ condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
+ } else
+ if (isTextureOp(tex->op)) {
+ int n = tex->srcCount(0xff, true);
+ if (n > 4) {
+ condenseSrcs(tex, 0, 3);
+ if (n > 5) // NOTE: first call modified positions already
+ condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
+ } else
+ if (n > 1) {
+ condenseSrcs(tex, 0, n - 1);
+ }
+ }
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
+{
+ int n, s;
+
+ textureMask(tex);
+
+ if (tex->op == OP_TXQ) {
+ s = tex->srcCount(0xff);
+ n = 0;
+ } else {
+ s = tex->tex.target.getArgCount();
+ if (!tex->tex.target.isArray() &&
+ (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
+ ++s;
+ if (tex->op == OP_TXD && tex->tex.useOffsets)
+ ++s;
+ n = tex->srcCount(0xff) - s;
+ assert(n <= 4);
+ }
+
+ if (s > 1)
+ condenseSrcs(tex, 0, s - 1);
+ if (n > 1) // NOTE: first call modified positions already
+ condenseSrcs(tex, 1, n);
+
+ condenseDefs(tex);
+}
+
+void
+RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
+{
+ Value *pred = tex->getPredicate();
+ if (pred)
+ tex->setPredicate(tex->cc, NULL);
+
+ textureMask(tex);
+
+ assert(tex->defExists(0) && tex->srcExists(0));
+ // make src and def count match
+ int c;
+ for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
+ if (!tex->srcExists(c))
+ tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
+ if (!tex->defExists(c))
+ tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
+ }
+ if (pred)
+ tex->setPredicate(tex->cc, pred);
+ condenseDefs(tex);
+ condenseSrcs(tex, 0, c - 1);
+}
+
+// Insert constraint markers for instructions whose multiple sources must be
+// located in consecutive registers.
+bool
+RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
+{
+ TexInstruction *tex;
+ Instruction *next;
+ int s, size;
+
+ targ = bb->getProgram()->getTarget();
+
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ if ((tex = i->asTex())) {
+ switch (targ->getChipset() & ~0xf) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ texConstraintNV50(tex);
+ break;
+ case 0xc0:
+ case 0xd0:
+ texConstraintNVC0(tex);
+ break;
+ case 0xe0:
+ case NVISA_GK110_CHIPSET:
+ texConstraintNVE0(tex);
+ break;
+ default:
+ break;
+ }
+ } else
+ if (i->op == OP_EXPORT || i->op == OP_STORE) {
+ for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
+ assert(i->srcExists(s));
+ size -= i->getSrc(s)->reg.size;
+ }
+ condenseSrcs(i, 1, s - 1);
+ } else
+ if (i->op == OP_LOAD || i->op == OP_VFETCH) {
+ condenseDefs(i);
+ if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
+ addHazard(i, i->src(0).getIndirect(0));
+ } else
+ if (i->op == OP_UNION ||
+ i->op == OP_MERGE ||
+ i->op == OP_SPLIT) {
+ constrList.push_back(i);
+ }
+ }
+ return true;
+}
+
+// Insert extra moves so that, if multiple register constraints on a value are
+// in conflict, these conflicts can be resolved.
+bool
+RegAlloc::InsertConstraintsPass::insertConstraintMoves()
+{
+ for (std::list<Instruction *>::iterator it = constrList.begin();
+ it != constrList.end();
+ ++it) {
+ Instruction *cst = *it;
+ Instruction *mov;
+
+ if (cst->op == OP_SPLIT && 0) {
+ // spilling splits is annoying, just make sure they're separate
+ for (int d = 0; cst->defExists(d); ++d) {
+ if (!cst->getDef(d)->refCount())
+ continue;
+ LValue *lval = new_LValue(func, cst->def(d).getFile());
+ const uint8_t size = cst->def(d).getSize();
+ lval->reg.size = size;
+
+ mov = new_Instruction(func, OP_MOV, typeOfSize(size));
+ mov->setSrc(0, lval);
+ mov->setDef(0, cst->getDef(d));
+ cst->setDef(d, mov->getSrc(0));
+ cst->bb->insertAfter(cst, mov);
+
+ cst->getSrc(0)->asLValue()->noSpill = 1;
+ mov->getSrc(0)->asLValue()->noSpill = 1;
+ }
+ } else
+ if (cst->op == OP_MERGE || cst->op == OP_UNION) {
+ for (int s = 0; cst->srcExists(s); ++s) {
+ const uint8_t size = cst->src(s).getSize();
+
+ if (!cst->getSrc(s)->defs.size()) {
+ mov = new_Instruction(func, OP_NOP, typeOfSize(size));
+ mov->setDef(0, cst->getSrc(s));
+ cst->bb->insertBefore(cst, mov);
+ continue;
+ }
+ assert(cst->getSrc(s)->defs.size() == 1); // still SSA
+
+ Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+ // catch some cases where don't really need MOVs
+ if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
+ continue;
+
+ LValue *lval = new_LValue(func, cst->src(s).getFile());
+ lval->reg.size = size;
+
+ mov = new_Instruction(func, OP_MOV, typeOfSize(size));
+ mov->setDef(0, lval);
+ mov->setSrc(0, cst->getSrc(s));
+ cst->setSrc(s, mov->getDef(0));
+ cst->bb->insertBefore(cst, mov);
+
+ cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
+
+ if (cst->op == OP_UNION)
+ mov->setPredicate(defi->cc, defi->getPredicate());
+ }
+ }
+ }
+
+ return true;
+}
+
+} // namespace nv50_ir