diff options
author | Daniel Schürmann <daniel@schuermann.dev> | 2020-09-11 15:54:39 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-01-13 17:46:56 +0000 |
commit | e3790fc4587485b75a389a5a640846c8b5ffa33f (patch) | |
tree | f1b620f06ad1a75dea64a9235ad32373be6bbb34 /src/amd | |
parent | a9fd9187e830b6665984f2f9cf651465c266dc85 (diff) |
aco: optimize packed clamp
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6680>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index d4b95f50538..768037a5294 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2728,7 +2728,27 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr) void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) { - // TODO: clamp, fneg? + VOP3P_instruction* vop3p = static_cast<VOP3P_instruction*>(instr.get()); + + /* apply clamp */ + if (instr->opcode == aco_opcode::v_pk_mul_f16 && + instr->operands[1].constantEquals(0x3C00) && + vop3p->clamp && + vop3p->opsel_lo == 0x0 && + vop3p->opsel_hi == 0x1 && + instr->operands[0].isTemp() && + ctx.uses[instr->operands[0].tempId()] == 1) { + + ssa_info& info = ctx.info[instr->operands[0].tempId()]; + if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) { + Instruction* candidate = ctx.info[instr->operands[0].tempId()].instr; + static_cast<VOP3P_instruction*>(candidate)->clamp = true; + std::swap(instr->definitions[0], candidate->definitions[0]); + ctx.info[candidate->definitions[0].tempId()].instr = candidate; + ctx.uses[instr->definitions[0].tempId()]--; + return; + } + } if (instr->opcode == aco_opcode::v_pk_add_f16) { if (instr->definitions[0].isPrecise()) @@ -2781,7 +2801,6 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) assert(mul_instr->format == Format::VOP3P); aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)}; VOP3P_instruction* mul = static_cast<VOP3P_instruction*>(mul_instr); - VOP3P_instruction* vop3p = static_cast<VOP3P_instruction*>(instr.get()); for (unsigned i = 0; i < 2; i++) { fma->operands[i] = op[i]; fma->neg_lo[i] = mul->neg_lo[i]; |