summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2019-06-05 17:23:11 -0700
committerIan Romanick <ian.d.romanick@intel.com>2019-07-11 10:20:03 -0700
commit09705747d727d2ec48a6c4af8206ec3411dec127 (patch)
tree0295be8e86544d666247603148a6aa184ad1d0f5
parentff9f526de33ea1243b8aff1c23915c652d879d07 (diff)
nir/algebraic: Reassociate fadd into fmul in DPH-like pattern
Moving the add to the other end of the sequence allows it to be fused into an FMA. Ice Lake total instructions in shared programs: 17173074 -> 16933147 (-1.40%) instructions in affected programs: 7938745 -> 7698818 (-3.02%) helped: 35583 HURT: 90 helped stats (abs) min: 1 max: 716 x̄: 6.75 x̃: 6 helped stats (rel) min: 0.10% max: 53.04% x̄: 5.29% x̃: 3.45% HURT stats (abs) min: 1 max: 41 x̄: 2.46 x̃: 1 HURT stats (rel) min: 0.32% max: 8.33% x̄: 1.41% x̃: 0.77% 95% mean confidence interval for instructions value: -6.80 -6.65 95% mean confidence interval for instructions %-change: -5.32% -5.22% Instructions are helped. total cycles in shared programs: 360881386 -> 359533568 (-0.37%) cycles in affected programs: 189489144 -> 188141326 (-0.71%) helped: 27250 HURT: 6707 helped stats (abs) min: 1 max: 21997 x̄: 62.15 x̃: 16 helped stats (rel) min: <.01% max: 70.69% x̄: 4.04% x̃: 2.35% HURT stats (abs) min: 1 max: 3507 x̄: 51.56 x̃: 14 HURT stats (rel) min: <.01% max: 77.26% x̄: 2.72% x̃: 1.27% 95% mean confidence interval for cycles value: -44.70 -34.68 95% mean confidence interval for cycles %-change: -2.75% -2.65% Cycles are helped. total spills in shared programs: 8943 -> 8829 (-1.27%) spills in affected programs: 625 -> 511 (-18.24%) helped: 6 HURT: 3 total fills in shared programs: 21815 -> 21719 (-0.44%) fills in affected programs: 1653 -> 1557 (-5.81%) helped: 7 HURT: 10 LOST: 11 GAINED: 3 Skylake and Broadwell had similar results. (Skylake shown) total instructions in shared programs: 15271996 -> 15040882 (-1.51%) instructions in affected programs: 7193699 -> 6962585 (-3.21%) helped: 33985 HURT: 30 helped stats (abs) min: 1 max: 260 x̄: 6.80 x̃: 6 helped stats (rel) min: 0.10% max: 30.00% x̄: 5.54% x̃: 3.85% HURT stats (abs) min: 1 max: 41 x̄: 4.00 x̃: 3 HURT stats (rel) min: 0.20% max: 2.16% x̄: 1.46% x̃: 1.72% 95% mean confidence interval for instructions value: -6.87 -6.72 95% mean confidence interval for instructions %-change: -5.59% -5.48% Instructions are helped. total cycles in shared programs: 355520785 -> 354253799 (-0.36%) cycles in affected programs: 185869148 -> 184602162 (-0.68%) helped: 25824 HURT: 6287 helped stats (abs) min: 1 max: 21997 x̄: 61.66 x̃: 16 helped stats (rel) min: <.01% max: 42.05% x̄: 4.18% x̃: 2.41% HURT stats (abs) min: 1 max: 3327 x̄: 51.76 x̃: 14 HURT stats (rel) min: <.01% max: 101.62% x̄: 2.80% x̃: 1.28% 95% mean confidence interval for cycles value: -44.70 -34.21 95% mean confidence interval for cycles %-change: -2.87% -2.76% Cycles are helped. total spills in shared programs: 8835 -> 8818 (-0.19%) spills in affected programs: 613 -> 596 (-2.77%) helped: 5 HURT: 2 total fills in shared programs: 21738 -> 21744 (0.03%) fills in affected programs: 1348 -> 1354 (0.45%) helped: 5 HURT: 11 LOST: 0 GAINED: 12 Haswell total instructions in shared programs: 13447102 -> 13381508 (-0.49%) instructions in affected programs: 3770735 -> 3705141 (-1.74%) helped: 11999 HURT: 29 helped stats (abs) min: 1 max: 409 x̄: 5.60 x̃: 3 helped stats (rel) min: 0.10% max: 20.00% x̄: 2.38% x̃: 1.87% HURT stats (abs) min: 3 max: 750 x̄: 54.90 x̃: 3 HURT stats (rel) min: 0.12% max: 125.30% x̄: 9.96% x̃: 1.82% 95% mean confidence interval for instructions value: -5.71 -5.19 95% mean confidence interval for instructions %-change: -2.39% -2.30% Instructions are helped. total cycles in shared programs: 376342236 -> 375690458 (-0.17%) cycles in affected programs: 155699021 -> 155047243 (-0.42%) helped: 8397 HURT: 2876 helped stats (abs) min: 1 max: 20248 x̄: 109.87 x̃: 18 helped stats (rel) min: <.01% max: 40.71% x̄: 2.23% x̃: 1.49% HURT stats (abs) min: 1 max: 15414 x̄: 94.15 x̃: 22 HURT stats (rel) min: <.01% max: 432.49% x̄: 3.15% x̃: 1.41% 95% mean confidence interval for cycles value: -67.64 -48.00 95% mean confidence interval for cycles %-change: -0.99% -0.74% Cycles are helped. total spills in shared programs: 23134 -> 23184 (0.22%) spills in affected programs: 1675 -> 1725 (2.99%) helped: 13 HURT: 11 total fills in shared programs: 34550 -> 34686 (0.39%) fills in affected programs: 1421 -> 1557 (9.57%) helped: 13 HURT: 11 LOST: 0 GAINED: 11 Ivy Bridge total instructions in shared programs: 12019642 -> 11987285 (-0.27%) instructions in affected programs: 1532236 -> 1499879 (-2.11%) helped: 5522 HURT: 110 helped stats (abs) min: 1 max: 312 x̄: 6.22 x̃: 3 helped stats (rel) min: 0.16% max: 20.00% x̄: 2.46% x̃: 1.88% HURT stats (abs) min: 1 max: 750 x̄: 18.07 x̃: 3 HURT stats (rel) min: 0.09% max: 125.30% x̄: 3.42% x̃: 1.15% 95% mean confidence interval for instructions value: -6.25 -5.24 95% mean confidence interval for instructions %-change: -2.43% -2.26% Instructions are helped. total cycles in shared programs: 180214667 -> 179761900 (-0.25%) cycles in affected programs: 31448723 -> 30995956 (-1.44%) helped: 7191 HURT: 2838 helped stats (abs) min: 1 max: 17680 x̄: 88.47 x̃: 17 helped stats (rel) min: <.01% max: 50.45% x̄: 2.16% x̃: 1.40% HURT stats (abs) min: 1 max: 15540 x̄: 64.63 x̃: 24 HURT stats (rel) min: 0.02% max: 435.17% x̄: 3.10% x̃: 1.51% 95% mean confidence interval for cycles value: -53.34 -36.95 95% mean confidence interval for cycles %-change: -0.81% -0.53% Cycles are helped. total spills in shared programs: 3599 -> 3642 (1.19%) spills in affected programs: 1180 -> 1223 (3.64%) helped: 12 HURT: 2 total fills in shared programs: 4031 -> 4162 (3.25%) fills in affected programs: 876 -> 1007 (14.95%) helped: 12 HURT: 2 LOST: 6 GAINED: 5 Sandy Bridge total instructions in shared programs: 10850686 -> 10822890 (-0.26%) instructions in affected programs: 1247986 -> 1220190 (-2.23%) helped: 4699 HURT: 102 helped stats (abs) min: 1 max: 104 x̄: 6.02 x̃: 3 helped stats (rel) min: 0.15% max: 17.65% x̄: 2.44% x̃: 1.88% HURT stats (abs) min: 1 max: 16 x̄: 4.70 x̃: 3 HURT stats (rel) min: 0.09% max: 3.85% x̄: 1.11% x̃: 1.10% 95% mean confidence interval for instructions value: -6.10 -5.47 95% mean confidence interval for instructions %-change: -2.42% -2.30% Instructions are helped. total cycles in shared programs: 154044149 -> 153920095 (-0.08%) cycles in affected programs: 26037392 -> 25913338 (-0.48%) helped: 5974 HURT: 2521 helped stats (abs) min: 1 max: 1802 x̄: 35.42 x̃: 16 helped stats (rel) min: <.01% max: 35.80% x̄: 1.43% x̃: 0.84% HURT stats (abs) min: 1 max: 862 x̄: 34.73 x̃: 20 HURT stats (rel) min: 0.01% max: 36.33% x̄: 1.67% x̃: 0.85% 95% mean confidence interval for cycles value: -16.31 -12.90 95% mean confidence interval for cycles %-change: -0.56% -0.45% Cycles are helped. total spills in shared programs: 2876 -> 2957 (2.82%) spills in affected programs: 592 -> 673 (13.68%) helped: 6 HURT: 35 total fills in shared programs: 3157 -> 3134 (-0.73%) fills in affected programs: 402 -> 379 (-5.72%) helped: 6 HURT: 0 LOST: 5 GAINED: 11 Reviewed-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py12
-rw-r--r--src/compiler/nir/nir_search_helpers.h24
2 files changed, 36 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index fa940429281..09eaeaf3f23 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1250,6 +1250,18 @@ late_optimizations = [
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
(('bcsel', a, 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))),
+
+ # Things that look like DPH in the source shader may get expanded to
+ # something that looks like dot(v1.xyz, v2.xyz) + v1.w by the time it gets
+ # to NIR. After FFMA is generated, this can look like:
+ #
+ # fadd(ffma(v1.z, v2.z, ffma(v1.y, v2.y, fmul(v1.x, v2.x))), v1.w)
+ #
+ # Reassociate the last addition into the first multiplication.
+ (('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
+ ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '!options->intel_vec4'),
+ (('~fadd', ('ffma(is_used_once)', a, b, ('fmul', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)') ), 'g(is_not_const)'),
+ ('ffma', a, b, ('ffma', e, 'f', 'g') ), '!options->intel_vec4'),
]
print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 658ed2b1d5b..7c19cd3fdfe 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -190,6 +190,30 @@ is_not_fmul(nir_alu_instr *instr, unsigned src,
}
static inline bool
+is_fsign(nir_alu_instr *instr, unsigned src,
+ UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
+{
+ nir_alu_instr *src_alu =
+ nir_src_as_alu_instr(instr->src[src].src);
+
+ if (src_alu == NULL)
+ return false;
+
+ if (src_alu->op == nir_op_fneg)
+ src_alu = nir_src_as_alu_instr(src_alu->src[0].src);
+
+ return src_alu->op == nir_op_fsign;
+}
+
+static inline bool
+is_not_const_and_not_fsign(nir_alu_instr *instr, unsigned src,
+ unsigned num_components, const uint8_t *swizzle)
+{
+ return is_not_const(instr, src, num_components, swizzle) &&
+ !is_fsign(instr, src, num_components, swizzle);
+}
+
+static inline bool
is_used_once(nir_alu_instr *instr)
{
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);