diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2021-11-12 13:46:17 +0000 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2021-11-17 20:06:22 +0000 |
commit | dc7bccdb545c60a11743fb37f4671d67d02c7031 (patch) | |
tree | efb726a037d1f719bec2ef8bb193c155817cc874 | |
parent | 565a14d7edcd8bcd379059756ca09c4081331c2e (diff) |
aco: consider pseudo-instructions reading exec in needs_exec_mask()
No matter the format, this should return true if the instruction has an
exec operand.
Otherwise, eliminate_useless_exec_writes_in_block() could remove an exec
write in a block if it's successor begins with:
s2: %3737:s[8-9] = p_parallelcopy %0:exec
s2: %0:exec, s1: %3738:scc = s_wqm_b64 %3737:s[8-9]
Totals from 3 (0.00% of 150170) affected shaders (GFX10.3):
CodeSize: 23184 -> 23204 (+0.09%)
Instrs: 4143 -> 4148 (+0.12%)
Latency: 98379 -> 98382 (+0.00%)
Copies: 172 -> 175 (+1.74%)
Branches: 95 -> 97 (+2.11%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: bc130497472 ("aco: Eliminate useless exec writes in jump threading.")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5620
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13776>
(cherry picked from commit d89461208b79a0c5f577caf6a98dab6976c12251)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_ir.cpp | 26 |
2 files changed, 14 insertions, 14 deletions
diff --git a/.pick_status.json b/.pick_status.json index bb32bf5abe8..10e43531f20 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -967,7 +967,7 @@ "description": "aco: consider pseudo-instructions reading exec in needs_exec_mask()", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bc130497472cb4ec4ec60695ed99b169d6681118" }, diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index dcc85a92e2f..5923fea9589 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -525,12 +525,18 @@ get_reduction_identity(ReduceOp op, unsigned idx) bool needs_exec_mask(const Instruction* instr) { - if (instr->isSALU() || instr->isBranch()) + if (instr->isVALU()) { + return instr->opcode != aco_opcode::v_readlane_b32 && + instr->opcode != aco_opcode::v_readlane_b32_e64 && + instr->opcode != aco_opcode::v_writelane_b32 && + instr->opcode != aco_opcode::v_writelane_b32_e64; + } + + if (instr->isVMEM() || instr->isFlatLike()) + return true; + + if (instr->isSALU() || instr->isBranch() || instr->isSMEM() || instr->isBarrier()) return instr->reads_exec(); - if (instr->isSMEM()) - return false; - if (instr->isBarrier()) - return false; if (instr->isPseudo()) { switch (instr->opcode) { @@ -543,22 +549,16 @@ needs_exec_mask(const Instruction* instr) if (def.getTemp().type() == RegType::vgpr) return true; } - return false; + return instr->reads_exec(); case aco_opcode::p_spill: case aco_opcode::p_reload: case aco_opcode::p_logical_start: case aco_opcode::p_logical_end: - case aco_opcode::p_startpgm: return false; + case aco_opcode::p_startpgm: return instr->reads_exec(); default: break; } } - if (instr->opcode == aco_opcode::v_readlane_b32 || - instr->opcode == aco_opcode::v_readlane_b32_e64 || - instr->opcode == aco_opcode::v_writelane_b32 || - instr->opcode == aco_opcode::v_writelane_b32_e64) - return false; - return true; } |