diff options
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 2 | ||||
-rw-r--r-- | src/amd/compiler/tests/test_optimizer.cpp | 28 |
3 files changed, 30 insertions, 2 deletions
diff --git a/.pick_status.json b/.pick_status.json index 873383191f5..21e9ef2ba1a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -8167,7 +8167,7 @@ "description": "aco: disallow various v_add_u32 opts if modifiers are used", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 3, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 1eeef1f5bb9..fba46f6d5ac 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2783,7 +2783,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2); } else if (instr->opcode == aco_opcode::v_add_u32) { if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ; - else if (ctx.program->chip_class >= GFX9) { + else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) { if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ; diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index d76cc37f8b1..ad704a5134d 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -181,3 +181,31 @@ BEGIN_TEST(optimize.const_comparison_ordering) finish_opt_test(); END_TEST + +BEGIN_TEST(optimize.add3) + //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v1", GFX9)) + return; + + //! v1: %res0 = v_add3_u32 %a, %b, %c + //! p_unit_test 0, %res0 + Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp1 = v_add_u32 %b, %c clamp + //! v1: %res1 = v_add_u32 %a, %tmp1 + //! p_unit_test 1, %res1 + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp2 = v_add_u32 %b, %c + //! v1: %res2 = v_add_u32 %a, %tmp2 clamp + //! p_unit_test 2, %res2 + tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); + static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + writeout(2, tmp); + + finish_opt_test(); +END_TEST |