summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-03-15 14:43:28 -0700
committerEric Anholt <eric@anholt.net>2013-04-01 16:17:25 -0700
commit9f43b8492818bab47ef9cc489b91c2618446a3e9 (patch)
tree8bdf704ad0fc4a8eb9f3c8c7fc15cab30e4e3829 /src/mesa/drivers/dri/i965/brw_fs_cse.cpp
parentdca5fc14358a8b267b3854c39c976a822885898f (diff)
i965/fs: Do CSE on gen7's varying-index pull constant loads.
This is our first CSE on a regs_written() > 1 instruction, so it takes a bit of extra fixup. Reduces the number of loads on kwin's Lanczos shader from 12 to 2. v2: Fix compiler warning (false positive on possibly-uninitialized variable) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1) NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_cse.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp43
1 files changed, 32 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 02642c91a6..5a50d45ddc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst)
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
case FS_OPCODE_CINTERP:
case FS_OPCODE_LINTERP:
return true;
@@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
*/
bool no_existing_temp = entry->tmp.file == BAD_FILE;
if (no_existing_temp) {
- entry->tmp = fs_reg(this, glsl_type::float_type);
- entry->tmp.type = inst->dst.type;
-
- fs_inst *copy = new(ralloc_parent(inst))
- fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
- entry->generator->insert_after(copy);
- entry->generator->dst = entry->tmp;
+ int written = entry->generator->regs_written();
+
+ fs_reg orig_dst = entry->generator->dst;
+ fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+ orig_dst.type);
+ entry->tmp = tmp;
+ entry->generator->dst = tmp;
+
+ for (int i = 0; i < written; i++) {
+ fs_inst *copy = MOV(orig_dst, tmp);
+ copy->force_writemask_all =
+ entry->generator->force_writemask_all;
+ entry->generator->insert_after(copy);
+
+ orig_dst.reg_offset++;
+ tmp.reg_offset++;
+ }
}
/* dest <- temp */
+ int written = inst->regs_written();
+ assert(written == entry->generator->regs_written());
assert(inst->dst.type == entry->tmp.type);
- fs_inst *copy = new(ralloc_parent(inst))
- fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
- copy->force_writemask_all = inst->force_writemask_all;
- inst->replace_with(copy);
+ fs_reg dst = inst->dst;
+ fs_reg tmp = entry->tmp;
+ fs_inst *copy = NULL;
+ for (int i = 0; i < written; i++) {
+ copy = MOV(dst, tmp);
+ copy->force_writemask_all = inst->force_writemask_all;
+ inst->insert_before(copy);
+
+ dst.reg_offset++;
+ tmp.reg_offset++;
+ }
+ inst->remove();
/* Appending an instruction may have changed our bblock end. */
if (inst == block->end) {