diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2020-10-07 11:45:30 +0100 |
---|---|---|
committer | Dylan Baker <dylan.c.baker@intel.com> | 2020-12-03 10:45:10 -0800 |
commit | 944bdf259cc622b708306789b64e0001bf5aaf5b (patch) | |
tree | fc646f6649164ac36e1e07d70a5e0fd481a60492 | |
parent | 7ea1f6ff78e1e8a2f090e67cdfea7cfe2a09a947 (diff) |
aco: disallow various v_add_u32 opts if modifiers are used
Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA
and DPP, so it would have been skipped anyway. Doing any of these with a
clamp modifier present would be incorrect.
No fossil-db changes.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 2 | ||||
-rw-r--r-- | src/amd/compiler/tests/test_optimizer.cpp | 28 |
3 files changed, 30 insertions, 2 deletions
diff --git a/.pick_status.json b/.pick_status.json index 873383191f5..21e9ef2ba1a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -8167,7 +8167,7 @@ "description": "aco: disallow various v_add_u32 opts if modifiers are used", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 3, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 1eeef1f5bb9..fba46f6d5ac 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2783,7 +2783,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2); } else if (instr->opcode == aco_opcode::v_add_u32) { if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ; - else if (ctx.program->chip_class >= GFX9) { + else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) { if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ; diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index d76cc37f8b1..ad704a5134d 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -181,3 +181,31 @@ BEGIN_TEST(optimize.const_comparison_ordering) finish_opt_test(); END_TEST + +BEGIN_TEST(optimize.add3) + //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v1", GFX9)) + return; + + //! v1: %res0 = v_add3_u32 %a, %b, %c + //! p_unit_test 0, %res0 + Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp1 = v_add_u32 %b, %c clamp + //! v1: %res1 = v_add_u32 %a, %tmp1 + //! p_unit_test 1, %res1 + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp2 = v_add_u32 %b, %c + //! v1: %res2 = v_add_u32 %a, %tmp2 clamp + //! p_unit_test 2, %res2 + tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); + static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + writeout(2, tmp); + + finish_opt_test(); +END_TEST |