summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2020-02-28 15:56:43 +0000
committerMarge Bot <eric+marge@anholt.net>2020-03-05 19:37:24 +0000
commit38743577f8b47c68ba01a9b9a982db52ef0f605d (patch)
tree44145eb765e766621f99e0afa45ab0e4d1ee880d
parent7f1b537304d4837c907a9299dab3a7acf2518b0b (diff)
aco: improve get_wait_states()
pipeline-db (Tahiti): Totals from affected shaders: SGPRS: 21208 -> 21208 (0.00 %) VGPRS: 22388 -> 22388 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 3278596 -> 3277004 (-0.05 %) bytes LDS: 19 -> 19 (0.00 %) blocks Max Waves: 238 -> 238 (0.00 %) pipeline-db (Polaris): Totals from affected shaders: SGPRS: 64 -> 64 (0.00 %) VGPRS: 96 -> 96 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 5200 -> 5192 (-0.15 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 10 -> 10 (0.00 %) pipeline-db (Vega): Totals from affected shaders: SGPRS: 0 -> 0 (0.00 %) VGPRS: 0 -> 0 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 0 -> 0 (0.00 %) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4004>
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 7c6e100faf1..9c5b1c8b7c6 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -179,7 +179,12 @@ struct NOP_ctx_gfx10 {
int get_wait_states(aco_ptr<Instruction>& instr)
{
- return 1;
+ if (instr->opcode == aco_opcode::s_nop)
+ return static_cast<SOPP_instruction*>(instr.get())->imm + 1;
+ else if (instr->opcode == aco_opcode::p_constaddr)
+ return 3; /* lowered to 3 instructions in the assembler */
+ else
+ return 1;
}
bool regs_intersect(PhysReg a_reg, unsigned a_size, PhysReg b_reg, unsigned b_size)