summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2014-11-02 20:36:53 -0800
committerMatt Turner <mattst88@gmail.com>2014-12-01 16:42:13 -0800
commit5df88c2096281f416b2738debac1c4c329e29673 (patch)
treec6d02d8dc2c34baa1b7c687875244a2a5db323d5
parent7a5cc789def94af7e5c364cce7b0884eee2bcc6b (diff)
i965/vec4: Rewrite dead code elimination to use live in/out.
Improves 359 shaders by >=10% 114 shaders by >=20% 91 shaders by >=30% 82 shaders by >=40% 22 shaders by >=50% 4 shaders by >=60% 2 shaders by >=80% total instructions in shared programs: 5845346 -> 5822422 (-0.39%) instructions in affected programs: 364979 -> 342055 (-6.28%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp155
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp169
3 files changed, 170 insertions, 155 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 26bf458e183..f03422e0498 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -103,6 +103,7 @@ i965_FILES = \
brw_vec4.cpp \
brw_vec4_copy_propagation.cpp \
brw_vec4_cse.cpp \
+ brw_vec4_dead_code_eliminate.cpp \
brw_vec4_generator.cpp \
brw_vec4_gs_visitor.cpp \
brw_vec4_live_variables.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 534b4b0668f..4d893e15dca 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -429,161 +429,6 @@ vec4_visitor::opt_reduce_swizzle()
return progress;
}
-static bool
-try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
- const struct brw_context *brw)
-{
- if (inst->has_side_effects())
- return false;
-
- if (new_writemask == 0) {
- /* Don't dead code eliminate instructions that write to the
- * accumulator as a side-effect. Instead just set the destination
- * to the null register to free it.
- */
- if (inst->writes_accumulator || inst->writes_flag()) {
- inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- } else {
- inst->opcode = BRW_OPCODE_NOP;
- }
-
- return true;
- } else if (inst->dst.writemask != new_writemask) {
- switch (inst->opcode) {
- case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_GEN4_SCRATCH_READ:
- case VS_OPCODE_PULL_CONSTANT_LOAD:
- case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
- break;
- default:
- /* Do not set a writemask on Gen6 for math instructions, those are
- * executed using align1 mode that does not support a destination mask.
- */
- if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
- inst->dst.writemask = new_writemask;
- return true;
- }
- }
- }
-
- return false;
-}
-
-/**
- * Must be called after calculate_live_intervals() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
- */
-bool
-vec4_visitor::dead_code_eliminate()
-{
- bool progress = false;
- int pc = -1;
-
- calculate_live_intervals();
-
- foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- pc++;
-
- bool inst_writes_flag = false;
- if (inst->dst.file != GRF) {
- if (inst->dst.is_null() && inst->writes_flag()) {
- inst_writes_flag = true;
- } else {
- continue;
- }
- }
-
- if (inst->dst.file == GRF) {
- int write_mask = inst->dst.writemask;
-
- for (int c = 0; c < 4; c++) {
- if (write_mask & (1 << c)) {
- assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
- if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
- write_mask &= ~(1 << c);
- }
- }
- }
-
- progress = try_eliminate_instruction(inst, write_mask, brw) ||
- progress;
- }
-
- if (inst->predicate || inst->prev == NULL)
- continue;
-
- int dead_channels;
- if (inst_writes_flag) {
-/* Arbitrarily chosen, other than not being an xyzw writemask. */
-#define FLAG_WRITEMASK (1 << 5)
- dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
- } else {
- dead_channels = inst->dst.writemask;
-
- for (int i = 0; i < 3; i++) {
- if (inst->src[i].file != GRF ||
- inst->src[i].reg != inst->dst.reg)
- continue;
-
- for (int j = 0; j < 4; j++) {
- int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
- dead_channels &= ~(1 << swiz);
- }
- }
- }
-
- foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
- inst, block) {
- if (dead_channels == 0)
- break;
-
- if (inst_writes_flag) {
- if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
- scan_inst->opcode = BRW_OPCODE_NOP;
- progress = true;
- continue;
- } else if (scan_inst->reads_flag()) {
- break;
- }
- }
-
- if (inst->dst.file == scan_inst->dst.file &&
- inst->dst.reg == scan_inst->dst.reg &&
- inst->dst.reg_offset == scan_inst->dst.reg_offset) {
- int new_writemask = scan_inst->dst.writemask & ~dead_channels;
-
- progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
- progress;
- }
-
- for (int i = 0; i < 3; i++) {
- if (scan_inst->src[i].file != inst->dst.file ||
- scan_inst->src[i].reg != inst->dst.reg)
- continue;
-
- for (int j = 0; j < 4; j++) {
- int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
- dead_channels &= ~(1 << swiz);
- }
- }
- }
- }
-
- if (progress) {
- foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
- if (inst->opcode == BRW_OPCODE_NOP) {
- inst->remove(block);
- }
- }
-
- invalidate_live_intervals();
- }
-
- return progress;
-}
-
void
vec4_visitor::split_uniform_registers()
{
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
new file mode 100644
index 00000000000..b8370ba63ec
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "brw_vec4_live_variables.h"
+#include "brw_cfg.h"
+
+/** @file brw_vec4_dead_code_eliminate.cpp
+ *
+ * Dataflow-aware dead code elimination.
+ *
+ * Walks the instruction list from the bottom, removing instructions that
+ * have results that both aren't used in later blocks and haven't been read
+ * yet in the tail end of this block.
+ */
+
+using namespace brw;
+
+static bool
+can_do_writemask(const struct brw_context *brw,
+ const vec4_instruction *inst)
+{
+ switch (inst->opcode) {
+ case SHADER_OPCODE_GEN4_SCRATCH_READ:
+ case VS_OPCODE_PULL_CONSTANT_LOAD:
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ return false;
+ default:
+ /* The MATH instruction on Gen6 only executes in align1 mode, which does
+ * not support writemasking.
+ */
+ if (brw->gen == 6 && inst->is_math())
+ return false;
+
+ if (inst->is_tex())
+ return false;
+
+ return true;
+ }
+}
+
+bool
+vec4_visitor::dead_code_eliminate()
+{
+ bool progress = false;
+
+ calculate_live_intervals();
+
+ int num_vars = live_intervals->num_vars;
+ BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
+ BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
+
+ foreach_block(block, cfg) {
+ memcpy(live, live_intervals->block_data[block->num].liveout,
+ sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
+ memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
+ sizeof(BITSET_WORD));
+
+ foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
+ if (inst->dst.file == GRF && !inst->has_side_effects()) {
+ bool result_live[4] = { false };
+
+ for (int c = 0; c < 4; c++) {
+ int var = inst->dst.reg * 4 + c;
+ result_live[c] = BITSET_TEST(live, var);
+ }
+
+ /* If the instruction can't do writemasking, then it's all or
+ * nothing.
+ */
+ if (!can_do_writemask(brw, inst)) {
+ bool result = result_live[0] | result_live[1] |
+ result_live[2] | result_live[3];
+ result_live[0] = result;
+ result_live[1] = result;
+ result_live[2] = result;
+ result_live[3] = result;
+ }
+
+ for (int c = 0; c < 4; c++) {
+ if (!result_live[c] && inst->dst.writemask & (1 << c)) {
+ inst->dst.writemask &= ~(1 << c);
+ progress = true;
+
+ if (inst->dst.writemask == 0) {
+ if (inst->writes_accumulator) {
+ inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+ } else {
+ inst->opcode = BRW_OPCODE_NOP;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ if (inst->dst.is_null() && inst->writes_flag()) {
+ if (!BITSET_TEST(flag_live, 0)) {
+ inst->opcode = BRW_OPCODE_NOP;
+ progress = true;
+ continue;
+ }
+ }
+
+ if (inst->dst.file == GRF && !inst->predicate) {
+ for (int c = 0; c < 4; c++) {
+ if (inst->dst.writemask & (1 << c)) {
+ int var = inst->dst.reg * 4 + c;
+ BITSET_CLEAR(live, var);
+ }
+ }
+ }
+
+ if (inst->writes_flag()) {
+ BITSET_CLEAR(flag_live, 0);
+ }
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ for (int c = 0; c < 4; c++) {
+ int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
+ int var = inst->src[i].reg * 4 + swiz;
+
+ BITSET_SET(live, var);
+ }
+ }
+ }
+
+ if (inst->reads_flag()) {
+ BITSET_SET(flag_live, 0);
+ }
+ }
+ }
+
+ ralloc_free(live);
+ ralloc_free(flag_live);
+
+ if (progress) {
+ foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
+ if (inst->opcode == BRW_OPCODE_NOP) {
+ inst->remove(block);
+ }
+ }
+
+ invalidate_live_intervals();
+ }
+
+ return progress;
+}