summaryrefslogtreecommitdiff
path: root/src/intel/compiler/brw_ir_performance.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler/brw_ir_performance.cpp')
-rw-r--r--src/intel/compiler/brw_ir_performance.cpp14
1 files changed, 14 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp
index 8d02db932ba..3c39594d121 100644
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@@ -1522,9 +1522,19 @@ namespace {
* difference is the worst-case scenario branch_weight used for
* SIMD32 which accounts for the possibility of a dynamically
* uniform branch becoming divergent in SIMD32.
+ *
+ * Note that we provide slightly more pessimistic weights on
+ * Gen12+ for SIMD32, since the effective warp size on that
+ * platform is 2x the SIMD width due to EU fusion, which increases
+ * the likelihood of divergent control flow in comparison to
+ * previous generations, giving narrower SIMD modes a performance
+ * advantage in several test-cases with non-uniform discard jumps.
*/
const float branch_weight = (dispatch_width > 16 ? 1.0 : 0.5);
+ const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
+ 1.0 : 0.5);
const float loop_weight = 10;
+ unsigned discard_count = 0;
unsigned elapsed = 0;
state st;
@@ -1538,6 +1548,8 @@ namespace {
if (inst->opcode == BRW_OPCODE_ENDIF)
st.weight /= branch_weight;
+ else if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT && discard_count)
+ st.weight /= discard_weight;
elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
@@ -1547,6 +1559,8 @@ namespace {
st.weight *= loop_weight;
else if (inst->opcode == BRW_OPCODE_WHILE)
st.weight /= loop_weight;
+ else if (inst->opcode == FS_OPCODE_DISCARD_JUMP && !discard_count++)
+ st.weight *= discard_weight;
}
p.block_latency[block->num] = elapsed - elapsed0;