diff options
author | Daniel Schürmann <daniel@schuermann.dev> | 2020-11-26 17:36:47 +0100 |
---|---|---|
committer | Dylan Baker <dylan.c.baker@intel.com> | 2021-01-13 11:24:11 -0800 |
commit | 3aaf5188fe0741776bb8218aee9c7507518cb0b8 (patch) | |
tree | 34683d456ed9d75ba44562d5e3d0552dc71fe57c | |
parent | 0d63d9463e8c86c6b2c6709283e6af1e865275df (diff) |
aco: remove divergent branches which only jump over very few instructions
Totals from 18436 (13.23% of 139391) affected shaders (NAVI10):
CodeSize: 138428504 -> 138172588 (-0.18%)
Instrs: 26605127 -> 26541176 (-0.24%)
Cycles: 1624994088 -> 1622461620 (-0.16%)
VMEM: 3689892 -> 3689102 (-0.02%)
SMEM: 1131767 -> 1131761 (-0.00%)
Branches: 851796 -> 787852 (-7.51%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7814>
(cherry picked from commit 288032a87316f8542d1d5de8b8e1d3a20359ceab)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 56 |
2 files changed, 44 insertions, 14 deletions
diff --git a/.pick_status.json b/.pick_status.json index ee6bc6c3f46..eae4a01e3e8 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -274,7 +274,7 @@ "description": "aco: remove divergent branches which only jump over very few instructions", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index a5b939e905b..a4e2d5b6404 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1792,7 +1792,7 @@ void lower_to_hw_instr(Program* program) { Block *discard_block = NULL; - for (size_t block_idx = 0; block_idx < program->blocks.size(); block_idx++) + for (int block_idx = program->blocks.size() - 1; block_idx >= 0; block_idx--) { Block *block = &program->blocks[block_idx]; lower_context ctx; @@ -1980,40 +1980,70 @@ void lower_to_hw_instr(Program* program) } } else if (instr->format == Format::PSEUDO_BRANCH) { Pseudo_branch_instruction* branch = static_cast<Pseudo_branch_instruction*>(instr.get()); + uint32_t target = branch->target[0]; + /* check if all blocks from current to target are empty */ - bool can_remove = block->index < branch->target[0]; + /* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */ + bool can_remove = block->index < target; + unsigned num_scalar = 0; + unsigned num_vector = 0; for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) { - if (program->blocks[i].instructions.size()) + /* uniform branches must not be ignored if they + * are about to jump over actual instructions */ + if (!program->blocks[i].instructions.empty() && + (branch->opcode != aco_opcode::p_cbranch_z || + branch->operands[0].physReg() != exec)) { can_remove = false; + break; + } + + for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) { + if (inst->format == Format::SOPP) { + can_remove = false; + } else if (inst->isSALU()) { + num_scalar++; + } else if (inst->isVALU()) { + num_vector++; + } else { + can_remove = false; + } + + if (num_scalar + num_vector * 2 > 4) + can_remove = false; + + if (!can_remove) + break; + } } + if (can_remove) continue; switch (instr->opcode) { case aco_opcode::p_branch: - assert(block->linear_succs[0] == branch->target[0]); - bld.sopp(aco_opcode::s_branch, branch->definitions[0], branch->target[0]); + assert(block->linear_succs[0] == target); + bld.sopp(aco_opcode::s_branch, branch->definitions[0], target); break; case aco_opcode::p_cbranch_nz: - assert(block->linear_succs[1] == branch->target[0]); + assert(block->linear_succs[1] == target); if (branch->operands[0].physReg() == exec) - bld.sopp(aco_opcode::s_cbranch_execnz, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_execnz, branch->definitions[0], target); else if (branch->operands[0].physReg() == vcc) - bld.sopp(aco_opcode::s_cbranch_vccnz, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_vccnz, branch->definitions[0], target); else { assert(branch->operands[0].physReg() == scc); - bld.sopp(aco_opcode::s_cbranch_scc1, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_scc1, branch->definitions[0], target); } break; case aco_opcode::p_cbranch_z: - assert(block->linear_succs[1] == branch->target[0]); + assert(block->linear_succs[1] == target); if (branch->operands[0].physReg() == exec) - bld.sopp(aco_opcode::s_cbranch_execz, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_execz, branch->definitions[0], target); else if (branch->operands[0].physReg() == vcc) - bld.sopp(aco_opcode::s_cbranch_vccz, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_vccz, branch->definitions[0], target); else { assert(branch->operands[0].physReg() == scc); - bld.sopp(aco_opcode::s_cbranch_scc0, branch->definitions[0], branch->target[0]); + bld.sopp(aco_opcode::s_cbranch_scc0, branch->definitions[0], target); } break; default: |