nir: Make some notes about fsign versus NaN

This commit only documents the current behavior, even if that behavior is not the behavior preferred by the relevant specs. In SPIR-V, there are two flavors of the sign instruction, and each lives in an extended instruction set. The GLSL.std.450 FSign instruction is defined as: Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. This also matches the GLSL 4.60 definition. However, the OpenCL.ExtendedInstructionSet.100 sign instruction is defined as: Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN. There are two differences. Each treats -0.0 differently, and each also treats NaN differently. Specifically, GLSL.std.450 FSign does not define any specific behavior for NaN. There has been some discussion in Khronos about the NaN behavior of GLSL.std.450 FSign. As part of that discussion, I did some research into how we treat NaN for nir_op_fsign, and this commit just captures some of those notes. v2: Document the expected behavior of nir_op_fsign more thoroughly. Suggested by Rhys. Note that the current implementation of constant folding does not produce the expected result for NaN. Suggested by Caio. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> [v1] Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6358>
author: Ian Romanick <ian.d.romanick@intel.com> 2020-02-18 10:18:57 -0800
committer: Marge Bot <eric+marge@anholt.net> 2021-01-05 02:07:09 +0000
commit: 363efc28234f6086d22270caf582ab5098d74171 (patch)
tree: 4280c0f747ef8256f330281fe07365e30dcec794
parent: 5ae7d40648ca643615fd551a3c6fcecb8301fc91 (diff)
2 files changed, 23 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 8c5045e4a13..bc0c7fe2c0b 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -195,6 +195,21 @@ unop("mov", tuint, "src0")
 unop("ineg", tint, "-src0")
 unop("fneg", tfloat, "-src0")
 unop("inot", tint, "~src0") # invert every bit of the integer
+
+# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
+# The GLSL.std.450 FSign instruction is defined as:
+#
+#    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
+#
+# If the source is equal to zero, there is a preference for the result to have
+# the same sign, but this is not required (it is required by OpenCL).  If the
+# source is not a number, there is a preference for the result to be +0.0, but
+# this is not required (it is required by OpenCL).  If the source is not a
+# number, and the result is not +0.0, the result should definitely **not** be
+# NaN.
+#
+# fsign(NaN) = (False ? 0.0 : (False ? 1.0 : -1.0) = -1.0.  This is allowed by
+# the spec, but it is not the preferred value.
 unop("fsign", tfloat, ("bit_size == 64 ? " +
                        "((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : " +
                        "((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f))"))
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index aa55e203ffd..d7577c5a2ac 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -148,6 +148,7 @@ optimizations = [
    # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a
    # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a
    # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0
+   # If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
    (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
    (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
    (('~ffma', 0.0, a, b), b),
@@ -583,6 +584,7 @@ optimizations.extend([
    # SignedZeroInfNanPreserve is set, but we don't currently have any way of
    # representing this in the optimizations other than the usual ~.
    (('~fmax', ('fmin', a,  0.0), -1.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_fsat'),
+   # fsat(fsign(NaN)) = fsat(0) = 0, and b2f(0 < NaN) = b2f(False) = 0.
    (('fsat', ('fsign', a)), ('b2f', ('flt', 0.0, a))),
    (('fsat', ('b2f', a)), ('b2f', a)),
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
@@ -666,6 +668,11 @@ for s in [16, 32, 64]:
        # The (i2f32, ...) part is an open-coded fsign.  When that is combined with
        # the bcsel, it's basically copysign(1.0, a).  There is no copysign in NIR,
        # so emit an open-coded version of that.
+       #
+       # If 'a' is NaN, bcsel(False, 1.0, i2f(b2i(False) - b2i(False))) = 0, but
+       # the replacement will produce ±1.
+       #
+       # The replacement will also produce a different value for -0: -1 vs +1.
        (('bcsel@{}'.format(s), ('feq', a, 0.0), 1.0, ('i2f{}'.format(s), ('iadd', ('b2i{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))))),
         ('ior', fp_one, ('iand', a, 1 << (s - 1)))),
 
@@ -1564,6 +1571,7 @@ optimizations.extend([
    (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
    (('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'),
    (('imax', ('imin', a, 1), -1), ('isign', a), '!options->lower_isign'),
+   # float(0 < NaN) - float(NaN < 0) = float(False) - float(False) = 0 - 0 = 0
    (('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'),
 
    # Address/offset calculations:
author	Ian Romanick <ian.d.romanick@intel.com>	2020-02-18 10:18:57 -0800
committer	Marge Bot <eric+marge@anholt.net>	2021-01-05 02:07:09 +0000
commit	363efc28234f6086d22270caf582ab5098d74171 (patch)
tree	4280c0f747ef8256f330281fe07365e30dcec794
parent	5ae7d40648ca643615fd551a3c6fcecb8301fc91 (diff)