summaryrefslogtreecommitdiff
path: root/src/compiler/nir
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-09-14 18:02:01 +0100
committerMarge Bot <emma+marge@anholt.net>2022-01-20 22:54:42 +0000
commit495debebad2a86b12c27da173a545ca44b0f5cff (patch)
treea36a09e52f30c68dbd6418ca51e96b6120f642bc /src/compiler/nir
parent14b82270837c331adc30bf2a747b3cb720917d67 (diff)
nir/algebraic: optimize expressions using fmulz/ffmaz
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py48
-rw-r--r--src/compiler/nir/nir_range_analysis.c26
-rw-r--r--src/compiler/nir/nir_search_helpers.h15
3 files changed, 75 insertions, 14 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 075fc1b485e..8c6cd77ffb9 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -146,10 +146,15 @@ optimizations = [
(('usadd_4x8_vc4', a, 0), a),
(('usadd_4x8_vc4', a, ~0), ~0),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+ (('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
(('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
(('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))),
+ (('~ffmaz', a, b, ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
+ (('~ffmaz', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
+ (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
+ (('~ffmaz', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz', a, c, d))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
(('ior', ('iand', a, b), ('iand', a, c)), ('iand', a, ('ior', b, c))),
@@ -164,10 +169,18 @@ optimizations = [
# The only effect a*0.0 should have is when 'a' is infinity, -0.0 or NaN
(('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16),
(('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32),
+ (('fmulz', a, 0.0), 0.0),
+ (('fmulz', a, 'b(is_finite_not_zero)'), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32),
+ (('fmulz', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)),
+ (('fmulz', a, a), ('fmul', a, a)),
+ (('ffmaz', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c), '!'+signed_zero_inf_nan_preserve_32),
+ (('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)),
+ (('ffmaz', a, a, b), ('ffma', a, a, b)),
(('imul', a, 0), 0),
(('umul_unorm_4x8_vc4', a, 0), 0),
(('umul_unorm_4x8_vc4', a, ~0), a),
(('~fmul', a, 1.0), a),
+ (('~fmulz', a, 1.0), a),
# The only effect a*1.0 can have is flushing denormals. If it's only used by
# a floating point instruction, they should flush any input denormals and
# this multiplication isn't needed.
@@ -184,12 +197,17 @@ optimizations = [
(('~ffma', 0.0, a, b), b),
(('ffma@16(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_16),
(('ffma@32(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_32),
+ (('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
(('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma@16', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_16),
(('ffma@32', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32),
+ (('ffmaz', a, b, 0.0), ('fmulz', a, b), '!'+signed_zero_inf_nan_preserve_32),
(('ffma', 1.0, a, b), ('fadd', a, b)),
+ (('ffmaz', 1.0, a, b), ('fadd', a, b), '!'+signed_zero_inf_nan_preserve_32),
(('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
+ (('ffmaz', -1.0, a, b), ('fadd', ('fneg', a), b), '!'+signed_zero_inf_nan_preserve_32),
(('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
+ (('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
(('~flrp', a, b, 0.0), a),
(('~flrp', a, b, 1.0), b),
(('~flrp', a, a, b), a),
@@ -769,6 +787,7 @@ optimizations.extend([
(('fsat', ('fsat', a)), ('fsat', a)),
(('fsat', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('fsat', ('fadd', ('fneg', a), ('fneg', b))), '!options->lower_fsat'),
(('fsat', ('fneg(is_used_once)', ('fmul(is_used_once)', a, b))), ('fsat', ('fmul', ('fneg', a), b)), '!options->lower_fsat'),
+ (('fsat', ('fneg(is_used_once)', ('fmulz(is_used_once)', a, b))), ('fsat', ('fmulz', ('fneg', a), b)), '!options->lower_fsat && !'+signed_zero_inf_nan_preserve_32),
(('fsat', ('fabs(is_used_once)', ('fmul(is_used_once)', a, b))), ('fsat', ('fmul', ('fabs', a), ('fabs', b))), '!options->lower_fsat'),
(('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
(('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
@@ -1228,6 +1247,7 @@ optimizations.extend([
(('~fmul', ('fexp2(is_used_once)', a), ('fexp2(is_used_once)', b)), ('fexp2', ('fadd', a, b))),
(('bcsel', ('flt', a, 0.0), 0.0, ('fsqrt', a)), ('fsqrt', ('fmax', a, 0.0))),
(('~fmul', ('fsqrt', a), ('fsqrt', a)), ('fabs',a)),
+ (('~fmulz', ('fsqrt', a), ('fsqrt', a)), ('fabs', a)),
# Division and reciprocal
(('~fdiv', 1.0, a), ('frcp', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
@@ -1546,29 +1566,42 @@ optimizations.extend([
# Propagate negation up multiplication chains
(('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
+ (('fmulz(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmulz', a, b)), '!'+signed_zero_inf_nan_preserve_32),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
+ (('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
# Propagate constants up multiplication chains
(('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
+ (('~fmulz(is_used_once)', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)),
+ (('~fmul(is_used_once)', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)),
(('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
(('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)),
+ (('~ffmaz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz', ('fmulz', a, c), b, d)),
+ (('~ffma', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz', ('fmul', a, c), b, d)),
# Prefer moving out a multiplication for more MAD/FMA-friendly code
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd(is_used_once)', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)),
+ (('~fadd(is_used_once)', ('ffmaz(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz', a, b, d), c)),
(('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
# Reassociate constants in add/mul chains so they can be folded together.
# For now, we mostly only handle cases where the constants are separated by
# a single non-constant. We could do better eventually.
(('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
+ (('~fmulz', '#a', ('fmulz', 'b(is_not_const)', '#c')), ('fmulz', ('fmulz', a, c), b)),
+ (('~fmul', '#a(is_finite_not_zero)', ('fmulz', 'b(is_not_const)', '#c')), ('fmulz', ('fmul', a, c), b)),
(('~ffma', '#a', ('fmul', 'b(is_not_const)', '#c'), d), ('ffma', ('fmul', a, c), b, d)),
+ (('~ffmaz', '#a', ('fmulz', 'b(is_not_const)', '#c'), d), ('ffmaz', ('fmulz', a, c), b, d)),
+ (('~ffmaz', '#a(is_finite_not_zero)', ('fmulz', 'b(is_not_const)', '#c'), d), ('ffmaz', ('fmul', a, c), b, d)),
(('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
(('~fadd', '#a', ('fadd', 'b(is_not_const)', '#c')), ('fadd', ('fadd', a, c), b)),
(('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
(('~fadd', '#a', ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d')), ('ffma', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
+ (('~fadd', '#a', ('ffmaz', 'b(is_not_const)', 'c(is_not_const)', '#d')), ('ffmaz', b, c, ('fadd', a, d))),
+ (('~fadd', '#a', ('fneg', ('ffmaz', 'b(is_not_const)', 'c(is_not_const)', '#d'))), ('ffmaz', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
(('iand', '#a', ('iand', 'b(is_not_const)', '#c')), ('iand', ('iand', a, c), b)),
(('ior', '#a', ('ior', 'b(is_not_const)', '#c')), ('ior', ('ior', a, c), b)),
@@ -1593,6 +1626,8 @@ optimizations.extend([
(('~fmul', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
(('~fmul', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
+ (('~fmulz', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
+ (('~fmulz', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
(('~bcsel', ('flt', a, 0.0), ('fneg', a), a), ('fabs', a)),
(('bcsel', a, ('bcsel', b, c, d), d), ('bcsel', ('iand', a, b), c, d)),
@@ -2297,7 +2332,7 @@ for op in ['flt', 'fge', 'ilt', 'ige', 'ult', 'uge']:
# mix(1, (a-1)+1, condition)
#
# Other optimizations will rearrange the constants.
-for op in ['fadd', 'fmul', 'iadd', 'imul']:
+for op in ['fadd', 'fmul', 'fmulz', 'iadd', 'imul']:
optimizations += [
((op, ('bcsel(is_used_once)', a, '#b', c), '#d'), ('bcsel', a, (op, b, d), (op, c, d)))
]
@@ -2339,7 +2374,7 @@ for op in ['flrp']:
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', e, c, d)), (op, ('bcsel', a, b, e), c, d)),
]
-for op in ['fmul', 'iadd', 'imul', 'iand', 'ior', 'ixor', 'fmin', 'fmax', 'imin', 'imax', 'umin', 'umax']:
+for op in ['fmulz', 'fmul', 'iadd', 'imul', 'iand', 'ior', 'ixor', 'fmin', 'fmax', 'imin', 'imax', 'umin', 'umax']:
optimizations += [
(('bcsel', a, (op + '(is_used_once)', b, c), (op, b, 'd(is_not_const)')), (op, b, ('bcsel', a, c, d))),
(('bcsel', a, (op + '(is_used_once)', b, 'c(is_not_const)'), (op, b, d)), (op, b, ('bcsel', a, c, d))),
@@ -2622,6 +2657,13 @@ late_optimizations = [
(('~fadd', ('fneg', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
('ffma', ('fneg', a), b, ('ffma', ('fneg', c), d, ('ffma', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
+ (('~fadd', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
+ ('ffmaz', a, b, ('ffmaz', c, d, ('ffmaz', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
+ (('~fadd', ('ffmaz(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
+ ('ffmaz', a, b, ('ffmaz', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
+ (('~fadd', ('fneg', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
+ ('ffmaz', ('fneg', a), b, ('ffmaz', ('fneg', c), d, ('ffmaz', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
+
# Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
#
# If bits is zero, the result will be zero.
@@ -2716,7 +2758,7 @@ for op in ['fadd']:
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
]
-for op in ['ffma']:
+for op in ['ffma', 'ffmaz']:
late_optimizations += [
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index 8cdefb8fe3d..24e3af14afe 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -858,7 +858,8 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
break;
}
- case nir_op_fmul: {
+ case nir_op_fmul:
+ case nir_op_fmulz: {
const struct ssa_result_range left =
analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
const struct ssa_result_range right =
@@ -878,14 +879,19 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
} else
r.range = fmul_table[left.range][right.range];
- /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both
- * operands are numbers and either both are finite or one is finite and
- * the other cannot be zero, then the result must be a number.
- */
- r.is_a_number = (left.is_a_number && right.is_a_number) &&
- ((left.is_finite && right.is_finite) ||
- (!is_not_zero(left.range) && right.is_finite) ||
- (left.is_finite && !is_not_zero(right.range)));
+ if (alu->op == nir_op_fmul) {
+ /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both
+ * operands are numbers and either both are finite or one is finite and
+ * the other cannot be zero, then the result must be a number.
+ */
+ r.is_a_number = (left.is_a_number && right.is_a_number) &&
+ ((left.is_finite && right.is_finite) ||
+ (!is_not_zero(left.range) && right.is_finite) ||
+ (left.is_finite && !is_not_zero(right.range)));
+ } else {
+ /* nir_op_fmulz: unlike nir_op_fmul, 0 * ±Inf is a number. */
+ r.is_a_number = left.is_a_number && right.is_a_number;
+ }
break;
}
@@ -1466,6 +1472,7 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_op_ubfe:
case nir_op_bfm:
case nir_op_fmul:
+ case nir_op_fmulz:
case nir_op_extract_u8:
case nir_op_extract_i8:
case nir_op_extract_u16:
@@ -1584,6 +1591,7 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
}
break;
case nir_op_fmul:
+ case nir_op_fmulz:
/* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */
if (src0 < 0x7f800000u && src1 < 0x7f800000u) {
float src0_f, src1_f;
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 2030dff6957..c35253f36a6 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -288,7 +288,7 @@ is_not_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src,
if (src_alu->op == nir_op_fneg)
return is_not_fmul(ht, src_alu, 0, 0, NULL);
- return src_alu->op != nir_op_fmul;
+ return src_alu->op != nir_op_fmul && src_alu->op != nir_op_fmulz;
}
static inline bool
@@ -304,7 +304,7 @@ is_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src,
if (src_alu->op == nir_op_fneg)
return is_fmul(ht, src_alu, 0, 0, NULL);
- return src_alu->op == nir_op_fmul;
+ return src_alu->op == nir_op_fmul || src_alu->op == nir_op_fmulz;
}
static inline bool
@@ -499,6 +499,17 @@ is_finite(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
return v.is_finite;
}
+static inline bool
+is_finite_not_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
+ unsigned src, UNUSED unsigned num_components,
+ UNUSED const uint8_t *swizzle)
+{
+ const struct ssa_result_range v = nir_analyze_range(ht, instr, src);
+
+ return v.is_finite &&
+ (v.range == lt_zero || v.range == gt_zero || v.range == ne_zero);
+}
+
#define RELATION(r) \
static inline bool \