summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-11-12 13:46:17 +0000
committerEric Engestrom <eric@engestrom.ch>2021-11-17 20:06:22 +0000
commitdc7bccdb545c60a11743fb37f4671d67d02c7031 (patch)
treeefb726a037d1f719bec2ef8bb193c155817cc874
parent565a14d7edcd8bcd379059756ca09c4081331c2e (diff)
aco: consider pseudo-instructions reading exec in needs_exec_mask()
No matter the format, this should return true if the instruction has an exec operand. Otherwise, eliminate_useless_exec_writes_in_block() could remove an exec write in a block if it's successor begins with: s2: %3737:s[8-9] = p_parallelcopy %0:exec s2: %0:exec, s1: %3738:scc = s_wqm_b64 %3737:s[8-9] Totals from 3 (0.00% of 150170) affected shaders (GFX10.3): CodeSize: 23184 -> 23204 (+0.09%) Instrs: 4143 -> 4148 (+0.12%) Latency: 98379 -> 98382 (+0.00%) Copies: 172 -> 175 (+1.74%) Branches: 95 -> 97 (+2.11%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Fixes: bc130497472 ("aco: Eliminate useless exec writes in jump threading.") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5620 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13776> (cherry picked from commit d89461208b79a0c5f577caf6a98dab6976c12251)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/compiler/aco_ir.cpp26
2 files changed, 14 insertions, 14 deletions
diff --git a/.pick_status.json b/.pick_status.json
index bb32bf5abe8..10e43531f20 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -967,7 +967,7 @@
"description": "aco: consider pseudo-instructions reading exec in needs_exec_mask()",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": "bc130497472cb4ec4ec60695ed99b169d6681118"
},
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index dcc85a92e2f..5923fea9589 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -525,12 +525,18 @@ get_reduction_identity(ReduceOp op, unsigned idx)
bool
needs_exec_mask(const Instruction* instr)
{
- if (instr->isSALU() || instr->isBranch())
+ if (instr->isVALU()) {
+ return instr->opcode != aco_opcode::v_readlane_b32 &&
+ instr->opcode != aco_opcode::v_readlane_b32_e64 &&
+ instr->opcode != aco_opcode::v_writelane_b32 &&
+ instr->opcode != aco_opcode::v_writelane_b32_e64;
+ }
+
+ if (instr->isVMEM() || instr->isFlatLike())
+ return true;
+
+ if (instr->isSALU() || instr->isBranch() || instr->isSMEM() || instr->isBarrier())
return instr->reads_exec();
- if (instr->isSMEM())
- return false;
- if (instr->isBarrier())
- return false;
if (instr->isPseudo()) {
switch (instr->opcode) {
@@ -543,22 +549,16 @@ needs_exec_mask(const Instruction* instr)
if (def.getTemp().type() == RegType::vgpr)
return true;
}
- return false;
+ return instr->reads_exec();
case aco_opcode::p_spill:
case aco_opcode::p_reload:
case aco_opcode::p_logical_start:
case aco_opcode::p_logical_end:
- case aco_opcode::p_startpgm: return false;
+ case aco_opcode::p_startpgm: return instr->reads_exec();
default: break;
}
}
- if (instr->opcode == aco_opcode::v_readlane_b32 ||
- instr->opcode == aco_opcode::v_readlane_b32_e64 ||
- instr->opcode == aco_opcode::v_writelane_b32 ||
- instr->opcode == aco_opcode::v_writelane_b32_e64)
- return false;
-
return true;
}