summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/compiler/aco_optimizer.cpp2
-rw-r--r--src/amd/compiler/tests/test_optimizer.cpp28
3 files changed, 30 insertions, 2 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 873383191f5..21e9ef2ba1a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -8167,7 +8167,7 @@
"description": "aco: disallow various v_add_u32 opts if modifiers are used",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 3,
"master_sha": null,
"because_sha": null
},
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 1eeef1f5bb9..fba46f6d5ac 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2783,7 +2783,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2);
} else if (instr->opcode == aco_opcode::v_add_u32) {
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
- else if (ctx.program->chip_class >= GFX9) {
+ else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index d76cc37f8b1..ad704a5134d 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -181,3 +181,31 @@ BEGIN_TEST(optimize.const_comparison_ordering)
finish_opt_test();
END_TEST
+
+BEGIN_TEST(optimize.add3)
+ //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
+ if (!setup_cs("v1 v1 v1", GFX9))
+ return;
+
+ //! v1: %res0 = v_add3_u32 %a, %b, %c
+ //! p_unit_test 0, %res0
+ Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
+ writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
+
+ //! v1: %tmp1 = v_add_u32 %b, %c clamp
+ //! v1: %res1 = v_add_u32 %a, %tmp1
+ //! p_unit_test 1, %res1
+ tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
+ static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+ writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
+
+ //! v1: %tmp2 = v_add_u32 %b, %c
+ //! v1: %res2 = v_add_u32 %a, %tmp2 clamp
+ //! p_unit_test 2, %res2
+ tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
+ tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
+ static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+ writeout(2, tmp);
+
+ finish_opt_test();
+END_TEST