summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2020-11-13 15:10:58 +0000
committerDylan Baker <dylan.c.baker@intel.com>2020-11-17 10:57:32 -0800
commitdc0580d3441152336134d4840a7b70389bd67cd5 (patch)
tree2bad9963b41025233f398115828d1426e9276636
parentded5cd528a9567fce68909a60c19dbd6b65c1211 (diff)
aco: disable omod if the sign of zeros should be preserved
The RDNA ISA doc says that omod doesn't preserve -0.0 in 6.2.2. LLVM appears to always disable omod in this situation, but clamp is unaffected. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Fixes: df645fa369d ("aco: implement VK_KHR_shader_float_controls") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7605> (cherry picked from commit 558daa73f9adb1275ddcf00515c7f79f726b7ae1)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/compiler/aco_optimizer.cpp11
2 files changed, 9 insertions, 4 deletions
diff --git a/.pick_status.json b/.pick_status.json
index fe678552342..6f40c83f18a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -706,7 +706,7 @@
"description": "aco: disable omod if the sign of zeros should be preserved",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": "df645fa369d12be4d5e0fd9e4f6d4455caf2f4c3"
},
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 3f2873a4d49..97eec28dee5 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2548,9 +2548,14 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
if (!instr->isSDWA() && !can_vop3)
return false;
- /* omod has no effect if denormals are enabled */
- bool can_use_omod = (instr->definitions[0].bytes() == 4 ? block.fp_mode.denorm32 : block.fp_mode.denorm16_64) == 0;
- can_use_omod = can_use_omod && (can_vop3 || ctx.program->chip_class >= GFX9); /* SDWA omod is GFX9+ */
+ /* omod flushes -0 to +0 and has no effect if denormals are enabled */
+ bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9); /* SDWA omod is GFX9+ */
+ if (instr->definitions[0].bytes() == 4)
+ can_use_omod = can_use_omod && block.fp_mode.denorm32 == 0 &&
+ !block.fp_mode.preserve_signed_zero_inf_nan32;
+ else
+ can_use_omod = can_use_omod && block.fp_mode.denorm16_64 == 0 &&
+ !block.fp_mode.preserve_signed_zero_inf_nan16_64;
ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];