diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2021-10-01 11:27:00 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-01-20 22:54:42 +0000 |
commit | f2fbba7920934dc94c907c069f73ad6513d91432 (patch) | |
tree | d7e441d4e94ed750454f67a098ccb95df5d57717 /src/compiler/nir | |
parent | 312a2849807f577f0bbdf10b4e5e5fd1c3546b02 (diff) |
nir/algebraic: optimize open-coded fmulz/ffmaz
This pattern will be found in future versions of D3D9 DXVK.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 147e0dd345d..075fc1b485e 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -238,6 +238,19 @@ optimizations = [ (('sudot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sudot_4x8_iadd', a, b, 0), c), '!options->lower_iadd_sat'), (('sdot_2x16_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sdot_2x16_iadd', a, b, 0), c), '!options->lower_iadd_sat'), (('udot_2x16_uadd_sat', '#a', '#b', 'c(is_not_const)'), ('uadd_sat', ('udot_2x16_uadd', a, b, 0), c), '!options->lower_uadd_sat'), + + # Optimize open-coded fmulz. + # (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b) -> fmulz(a, b) + (('fmul@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b)), + ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32), + (('fmul@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)')), + ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32), + + # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c) + (('ffma@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b), c), + ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32), + (('ffma@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), + ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32), ] # Shorthand for the expansion of just the dot product part of the [iu]dp4a |