summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2013-10-20 11:38:17 -0700
committerMatt Turner <mattst88@gmail.com>2013-10-30 19:49:27 -0700
commitb16b3c8703f198ca0f025b730d582600df79c19c (patch)
tree1b4aa59b1807f9ca3ee4f2d054f26cbe61a2070e /src/mesa/drivers/dri/i965/brw_fs_cse.cpp
parent219b43c612b2882e0bf82ac1a12ff073a42be6e1 (diff)
i965/fs: Perform CSE on CMP(N) instructions.
Optimizes cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g50<1>F g40<8,8,1>F g10<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g51<1>F g41<8,8,1>F g11<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g52<1>F g42<8,8,1>F g12<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g53<1>F g43<8,8,1>F g13<8,8,1>F into cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g50<1>F g40<8,8,1>F g10<8,8,1>F (+f0) sel(8) g51<1>F g41<8,8,1>F g11<8,8,1>F (+f0) sel(8) g52<1>F g42<8,8,1>F g12<8,8,1>F (+f0) sel(8) g53<1>F g43<8,8,1>F g13<8,8,1>F total instructions in shared programs: 1644938 -> 1638181 (-0.41%) instructions in affected programs: 574955 -> 568198 (-1.18%) Two more 16-wide programs (in L4D2). Some large (-9%) decreases in instruction count in some of Valve's Source Engine games. No regressions. Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Paul Berry <stereotype441@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_cse.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3f59339f71..47938744cf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -54,6 +54,8 @@ is_expression(const fs_inst *const inst)
case BRW_OPCODE_SHR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_ASR:
+ case BRW_OPCODE_CMP:
+ case BRW_OPCODE_CMPN:
case BRW_OPCODE_ADD:
case BRW_OPCODE_MUL:
case BRW_OPCODE_FRC:
@@ -102,6 +104,18 @@ operands_match(enum opcode op, fs_reg *xs, fs_reg *ys)
}
}
+static bool
+instructions_match(fs_inst *a, fs_inst *b)
+{
+ return a->opcode == b->opcode &&
+ a->saturate == b->saturate &&
+ a->predicate == b->predicate &&
+ a->predicate_inverse == b->predicate_inverse &&
+ a->conditional_mod == b->conditional_mod &&
+ a->dst.type == b->dst.type &&
+ operands_match(a->opcode, a->src, b->src);
+}
+
bool
fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
{
@@ -115,11 +129,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
inst = (fs_inst *) inst->next) {
/* Skip some cases. */
- if (is_expression(inst) &&
- !inst->predicate &&
- !inst->is_partial_write() &&
- !inst->conditional_mod &&
- inst->dst.file != HW_REG)
+ if (is_expression(inst) && !inst->is_partial_write())
{
bool found = false;
@@ -128,11 +138,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
entry = (aeb_entry *) entry_node;
/* Match current instruction's expression against those in AEB. */
- if (inst->opcode == entry->generator->opcode &&
- inst->saturate == entry->generator->saturate &&
- inst->dst.type == entry->generator->dst.type &&
- operands_match(inst->opcode, entry->generator->src, inst->src)) {
-
+ if (instructions_match(inst, entry->generator)) {
found = true;
progress = true;
break;
@@ -208,6 +214,19 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
foreach_list_safe(entry_node, aeb) {
aeb_entry *entry = (aeb_entry *)entry_node;
+ /* Kill all AEB entries that write a different value to or read from
+ * the flag register if we just wrote it.
+ */
+ if (inst->writes_flag()) {
+ if (entry->generator->reads_flag() ||
+ (entry->generator->writes_flag() &&
+ !instructions_match(inst, entry->generator))) {
+ entry->remove();
+ ralloc_free(entry);
+ continue;
+ }
+ }
+
for (int i = 0; i < 3; i++) {
fs_reg *src_reg = &entry->generator->src[i];