summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2019-09-24 17:06:12 -0500
committerDylan Baker <dylan@pnwbakers.com>2019-09-30 09:10:02 -0700
commite6edeebd157c791e6239076e4c136fe90af46e1e (patch)
treee4aa69d62602693e7a4b5aed1314e4d1698283db
parent2dbf10ba3d129d59e1a6c0e46b24c20629b277c6 (diff)
intel/fs: Fix fs_inst::flags_read for ANY/ALL predicates
Without this, we were DCEing flag writes because we didn't think their results were used because we didn't understand that an ANY32 predicate actually read all the flags. Fixes: df1aec763eb "i965/fs: Define methods to calculate the flag..." Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit 6c858b9a915b54d127a64ab817f111a82716a037)
-rw-r--r--src/intel/compiler/brw_fs.cpp34
1 files changed, 28 insertions, 6 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 083cd2cba10..e064de5c23d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1011,15 +1011,37 @@ fs_inst::size_read(int arg) const
}
namespace {
+ unsigned
+ predicate_width(brw_predicate predicate)
+ {
+ switch (predicate) {
+ case BRW_PREDICATE_NONE: return 1;
+ case BRW_PREDICATE_NORMAL: return 1;
+ case BRW_PREDICATE_ALIGN1_ANY2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ALL2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ANY4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ALL4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ANY8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ALL8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ANY16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ALL16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ANY32H: return 32;
+ case BRW_PREDICATE_ALIGN1_ALL32H: return 32;
+ default: unreachable("Unsupported predicate");
+ }
+ }
+
/* Return the subset of flag registers that an instruction could
* potentially read or write based on the execution controls and flag
* subregister number of the instruction.
*/
unsigned
- flag_mask(const fs_inst *inst)
+ flag_mask(const fs_inst *inst, unsigned width)
{
- const unsigned start = inst->flag_subreg * 16 + inst->group;
- const unsigned end = start + inst->exec_size;
+ assert(util_is_power_of_two_nonzero(width));
+ const unsigned start = (inst->flag_subreg * 16 + inst->group) &
+ ~(width - 1);
+ const unsigned end = start + ALIGN(inst->exec_size, width);
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
}
@@ -1051,9 +1073,9 @@ fs_inst::flags_read(const gen_device_info *devinfo) const
* f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware.
*/
const unsigned shift = devinfo->gen >= 7 ? 4 : 2;
- return flag_mask(this) << shift | flag_mask(this);
+ return flag_mask(this, 1) << shift | flag_mask(this, 1);
} else if (predicate) {
- return flag_mask(this);
+ return flag_mask(this, predicate_width(predicate));
} else {
unsigned mask = 0;
for (int i = 0; i < sources; i++) {
@@ -1072,7 +1094,7 @@ fs_inst::flags_written() const
opcode != BRW_OPCODE_WHILE)) ||
opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
opcode == FS_OPCODE_FB_WRITE) {
- return flag_mask(this);
+ return flag_mask(this, 1);
} else {
return flag_mask(dst, size_written);
}