summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2018-01-08 19:25:01 -0500
committerMarge Bot <emma+marge@anholt.net>2022-06-30 11:35:56 +0000
commitc86563c29efaa3598c36d31bdb4015f0d88ec970 (patch)
tree3182ee0584e64c201cbcab305631175db511bc19
parent6d145df51f74b8a3fc79e07b6b5e21495a99d1df (diff)
nv50/ir/ra: Fix copying compound for moves
In order to reduce moves when coalescing multiple registers into a larger register, RA will try to coalesce MERGE instructions with their definitions. For example, for something like this in GLSL: uint a = ...; uint b = ...; uint64 x = packUint2x32(a, b); The compiler will try to coalesce x with a and b, in the same way as something like: uint a = ...; uint b = ...; ... uint x = phi(a, b); with the crucial difference that the definitions of a and b only clobber part of the register, instead of the whole thing. This information is carried through the compound flag and compMask bitmask. If compound is set, then the value has been coalesced in such a way that not all the defs clobber the entire register. The compMask bitmask describes which subregister each def clobbers, although it does it in a slightly convoluted way. It's an invariant that once compound is set on one def, it must be set for all the defs in a given coalesced value. In more detail, the constraints pass will first create extra moves: uint a = ...; uint b = ...; uint a' = a; uint b' = b; uint64 x = packUint2x32(a', b'); and then RA will merge values involved in MERGE/SPLIT instructions, merging x with a' and b' and making the combined value compound -- this is relatively simple, and will always succeed since we just created a' and b', so they never interfere with x, and x has no other definitions, since we haven't started coalescing moves yet. Basically, we just replaced the MERGE instruction with an equivalent sequence of partial writes to the destination. The tricky part comes when we try to merge a' with a and b' with b. We need to transfer the compound information from a' to a and b' to b, which copyCompound() does, but we also need to transfer it to any defs coalesced with a and b, which the code failed to do. Similarly, if x is the argument to a phi instruction, then when we try to merge it with other arguments to the same phi by coalescing moves, we'd have problems guaranteeing that all the other merged defs stay up-to-date. One tricky part of fixing this is that in order to properly propagate the information from a' to a, we need to do it before the defs for a and a' are merged in coalesceValues(), since we need to know which defs are merged with a but not a' -- after coalesceValues() returns, all the defs have been combined, so we don't know which is which. I took the approach of calling copyCompound() inside coalesceValues(), instead of afterwards. v2: (mhenning) This now loops over mergedDefs in copyCompound, to update it for changes made in bcf6a9ec Cc: Ilia Mirkin <imirkin@alum.mit.edu> Cc: Karol Herbst <kherbst@redhat.com> Tested-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: M Henning <drawoc@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17115>
-rw-r--r--src/nouveau/codegen/nv50_ir_ra.cpp60
1 files changed, 39 insertions, 21 deletions
diff --git a/src/nouveau/codegen/nv50_ir_ra.cpp b/src/nouveau/codegen/nv50_ir_ra.cpp
index 92bd5a67372..ea45ff751a2 100644
--- a/src/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/nouveau/codegen/nv50_ir_ra.cpp
@@ -823,6 +823,7 @@ private:
void simplifyEdge(RIG_Node *, RIG_Node *);
void simplifyNode(RIG_Node *);
+ void copyCompound(Value *dst, Value *src);
bool coalesceValues(Value *, Value *, bool force);
void resolveSplitsAndMerges();
void makeCompound(Instruction *, bool isSplit);
@@ -955,6 +956,34 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
livei.insert(lval->livei);
}
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
+void
+GCRA::copyCompound(Value *dst, Value *src)
+{
+ LValue *ldst = dst->asLValue();
+ LValue *lsrc = src->asLValue();
+
+ if (ldst->compound && !lsrc->compound) {
+ LValue *swap = lsrc;
+ lsrc = ldst;
+ ldst = swap;
+ }
+
+ assert(!ldst->compound);
+
+ if (lsrc->compound) {
+ for (ValueDef *d : mergedDefs(ldst->join)) {
+ LValue *ldst = d->get()->asLValue();
+ if (!ldst->compound)
+ ldst->compMask = 0xff;
+ ldst->compound = 1;
+ ldst->compMask &= lsrc->compMask;
+ }
+ }
+}
+
bool
GCRA::coalesceValues(Value *dst, Value *src, bool force)
{
@@ -997,9 +1026,16 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
if (!force && nRep->livei.overlaps(nVal->livei))
return false;
+ // TODO: Handle this case properly.
+ if (!force && rep->compound && val->compound)
+ return false;
+
INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
rep->id, rep->reg.data.id, val->id);
+ if (!force)
+ copyCompound(dst, src);
+
// set join pointer of all values joined with val
const std::list<ValueDef *> &defs = mergedDefs(val);
for (ValueDef *def : defs)
@@ -1067,24 +1103,6 @@ static inline uint8_t makeCompMask(int compSize, int base, int size)
}
}
-// Used when coalescing moves. The non-compound value will become one, e.g.:
-// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
-// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
-static inline void copyCompound(Value *dst, Value *src)
-{
- LValue *ldst = dst->asLValue();
- LValue *lsrc = src->asLValue();
-
- if (ldst->compound && !lsrc->compound) {
- LValue *swap = lsrc;
- lsrc = ldst;
- ldst = swap;
- }
-
- ldst->compound = lsrc->compound;
- ldst->compMask = lsrc->compMask;
-}
-
void
GCRA::makeCompound(Instruction *insn, bool split)
{
@@ -1170,8 +1188,7 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
break;
i = insn->getSrc(0)->getUniqueInsn();
if (i && !i->constrainedDefs()) {
- if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
- copyCompound(insn->getSrc(0), insn->getDef(0));
+ coalesceValues(insn->getDef(0), insn->getSrc(0), false);
}
break;
case OP_TEX:
@@ -2617,7 +2634,8 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
defi->src(0).getFile() == FILE_MEMORY_CONST &&
!defi->src(0).isIndirect(0);
// catch some cases where don't really need MOVs
- if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
+ if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()
+ && defi->op != OP_MERGE && defi->op != OP_SPLIT) {
if (imm || load) {
// Move the defi right before the cst. No point in expanding
// the range.