summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2020-11-17 17:14:49 +0100
committerDylan Baker <dylan.c.baker@intel.com>2020-12-14 09:42:43 -0800
commit4c5d644bdb75ce69e69aa8a742860e2d8835e7f2 (patch)
treec11670e641dcc6cc6465ba32bf276c964075037b
parent60e001e95ff9acb6871c4b66ab6eb08eb82936ad (diff)
aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier
No fossils-db changes. Cc: 20.2, 20.3 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7657> (cherry picked from commit 0fcd379184d658285f3313c5c4026253e0ec6930)
-rw-r--r--src/amd/compiler/aco_optimizer.cpp8
-rw-r--r--src/amd/compiler/tests/test_optimizer.cpp23
2 files changed, 27 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 8aef3809cec..9bb1ad698c7 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2023,8 +2023,8 @@ bool combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposi
uint64_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label &
(label_omod_success | label_clamp_success);
- /* min(-max(a, b), c) -> min3(-a, -b, c) *
- * max(-min(a, b), c) -> max3(-a, -b, c) */
+ /* min(-max(a, b), c) -> min3(c, -a, -b) *
+ * max(-min(a, b), c) -> max3(c, -a, -b) */
for (unsigned swap = 0; swap < 2; swap++) {
Operand operands[3];
bool neg[3], abs[3], clamp, precise;
@@ -2036,8 +2036,8 @@ bool combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposi
&clamp, &omod, &inbetween_neg, NULL, NULL, &precise) &&
inbetween_neg) {
ctx.uses[instr->operands[swap].tempId()]--;
- neg[1] = true;
- neg[2] = true;
+ neg[1] = !neg[1];
+ neg[2] = !neg[2];
create_vop3_for_op3(ctx, minmax3, instr, operands, neg, abs, opsel, clamp, omod);
if (omod_clamp & label_omod_success)
ctx.info[instr->definitions[0].tempId()].set_omod_success(instr.get());
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index ad704a5134d..2c856e8775e 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -209,3 +209,26 @@ BEGIN_TEST(optimize.add3)
finish_opt_test();
END_TEST
+
+BEGIN_TEST(optimize.minmax)
+ for (unsigned i = GFX8; i <= GFX10; i++) {
+ //>> v1: %a, s2: %_:exec = p_startpgm
+ if (!setup_cs("v1", (chip_class)i))
+ continue;
+
+ //! v1: %res0 = v_max3_f32 0, -0, %a
+ //! p_unit_test 0, %res0
+ Temp xor0 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), Operand(inputs[0]));
+ Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), xor0);
+ Temp xor1 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), min);
+ writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1));
+
+ //! v1: %res1 = v_max3_f32 0, -0, -%a
+ //! p_unit_test 1, %res1
+ min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), Operand(inputs[0]));
+ xor1 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), min);
+ writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1));
+
+ finish_opt_test();
+ }
+END_TEST