summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2017-11-15 23:32:16 -0500
committerIlia Mirkin <imirkin@alum.mit.edu>2017-11-26 01:10:19 -0500
commit3072bbef63c9a41929cfd781a583de9689ec5b65 (patch)
tree00df7897883e6e72c2ffa69ae97017f055926c31 /src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
parent50e913b9c5d311334281da89b4e9969d48fd62b6 (diff)
nv50/ir: when merging immediates/consts, load directly
When a MERGE operation gets its constraint moves added, it susbstantially extends live ranges to be reusing an immediate from earlier in the program (not to mention the silliness of loading an immediate into a register, and then moving into another register). We detect these scenarios and insert moves that take the immediate or constbuf load directly into the register. If it's the last use, then we can just move that operation to the closer location. With SM35 (255 regs) we get these results: total instructions in shared programs : 6583670 -> 6580681 (-0.05%) total gprs used in shared programs : 950818 -> 944261 (-0.69%) total shared used in shared programs : 0 -> 0 (0.00%) total local used in shared programs : 15328 -> 15328 (0.00%) total bytes used in shared programs : 60367456 -> 60339896 (-0.05%) local shared gpr inst bytes helped 0 0 4584 3186 3186 hurt 0 0 55 968 968 I suspect they will be better for SM20 and SM30. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp22
1 files changed, 21 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index b33d7b4010d..3a0e56e1385 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2331,9 +2331,21 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves()
assert(cst->getSrc(s)->defs.size() == 1); // still SSA
Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+ bool imm = defi->op == OP_MOV &&
+ defi->src(0).getFile() == FILE_IMMEDIATE;
+ bool load = defi->op == OP_LOAD &&
+ defi->src(0).getFile() == FILE_MEMORY_CONST &&
+ !defi->src(0).isIndirect(0);
// catch some cases where don't really need MOVs
- if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
+ if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
+ if (imm || load) {
+ // Move the defi right before the cst. No point in expanding
+ // the range.
+ defi->bb->remove(defi);
+ cst->bb->insertBefore(cst, defi);
+ }
continue;
+ }
LValue *lval = new_LValue(func, cst->src(s).getFile());
lval->reg.size = size;
@@ -2341,6 +2353,14 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves()
mov = new_Instruction(func, OP_MOV, typeOfSize(size));
mov->setDef(0, lval);
mov->setSrc(0, cst->getSrc(s));
+
+ if (load) {
+ mov->op = OP_LOAD;
+ mov->setSrc(0, defi->getSrc(0));
+ } else if (imm) {
+ mov->setSrc(0, defi->getSrc(0));
+ }
+
cst->setSrc(s, mov->getDef(0));
cst->bb->insertBefore(cst, mov);