From 944bdf259cc622b708306789b64e0001bf5aaf5b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 7 Oct 2020 11:45:30 +0100 Subject: aco: disallow various v_add_u32 opts if modifiers are used Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA and DPP, so it would have been skipped anyway. Doing any of these with a clamp modifier present would be incorrect. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Cc: mesa-stable Part-of: --- .pick_status.json | 2 +- src/amd/compiler/aco_optimizer.cpp | 2 +- src/amd/compiler/tests/test_optimizer.cpp | 28 ++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 873383191f5..21e9ef2ba1a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -8167,7 +8167,7 @@ "description": "aco: disallow various v_add_u32 opts if modifiers are used", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 3, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 1eeef1f5bb9..fba46f6d5ac 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2783,7 +2783,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2); } else if (instr->opcode == aco_opcode::v_add_u32) { if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ; - else if (ctx.program->chip_class >= GFX9) { + else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) { if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ; diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index d76cc37f8b1..ad704a5134d 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -181,3 +181,31 @@ BEGIN_TEST(optimize.const_comparison_ordering) finish_opt_test(); END_TEST + +BEGIN_TEST(optimize.add3) + //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v1", GFX9)) + return; + + //! v1: %res0 = v_add3_u32 %a, %b, %c + //! p_unit_test 0, %res0 + Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp1 = v_add_u32 %b, %c clamp + //! v1: %res1 = v_add_u32 %a, %tmp1 + //! p_unit_test 1, %res1 + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + static_cast(tmp.instr)->clamp = true; + writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp2 = v_add_u32 %b, %c + //! v1: %res2 = v_add_u32 %a, %tmp2 clamp + //! p_unit_test 2, %res2 + tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); + static_cast(tmp.instr)->clamp = true; + writeout(2, tmp); + + finish_opt_test(); +END_TEST -- cgit v1.2.3