summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2019-08-06 13:11:56 -0700
committerIan Romanick <ian.d.romanick@intel.com>2019-08-14 11:15:37 -0700
commit0e6581b87dc4e168fa864cd39e8947a58999189a (patch)
treef5c1132711f4c3b525e43510abb9898c368f1ea9
parent73aaeac0a3a46e441953ef6beab665af18180eec (diff)
nir/algebraic: Reassociate shift-by-constant of shift-by-constant
v2: After some review discussion with Alyssa, the replacements now correct account for cases where (b+c) >= bitsize. v3: Use a temporary to simplify the Python code quite a bit. Suggested by Jason. Haswell and all Gen8+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 16251155 -> 16249576 (<.01%) instructions in affected programs: 232627 -> 231048 (-0.68%) helped: 547 HURT: 1 helped stats (abs) min: 1 max: 15 x̄: 2.89 x̃: 3 helped stats (rel) min: 0.04% max: 7.84% x̄: 1.14% x̃: 1.06% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.12% max: 0.12% x̄: 0.12% x̃: 0.12% 95% mean confidence interval for instructions value: -3.12 -2.65 95% mean confidence interval for instructions %-change: -1.20% -1.06% Instructions are helped. total cycles in shared programs: 365924392 -> 365372103 (-0.15%) cycles in affected programs: 59207053 -> 58654764 (-0.93%) helped: 497 HURT: 34 helped stats (abs) min: 1 max: 29300 x̄: 1118.16 x̃: 16 helped stats (rel) min: <.01% max: 10.59% x̄: 1.82% x̃: 1.82% HURT stats (abs) min: 2 max: 424 x̄: 101.03 x̃: 63 HURT stats (rel) min: 0.07% max: 46.17% x̄: 4.72% x̃: 2.06% 95% mean confidence interval for cycles value: -1426.41 -653.77 95% mean confidence interval for cycles %-change: -1.66% -1.15% Cycles are helped. total spills in shared programs: 8870 -> 8871 (0.01%) spills in affected programs: 104 -> 105 (0.96%) helped: 0 HURT: 1 Ivy Bridge and all pre-Gen7 platforms had similar results. (Ivy Bridge shown) total instructions in shared programs: 11956236 -> 11955635 (<.01%) instructions in affected programs: 94110 -> 93509 (-0.64%) helped: 106 HURT: 0 helped stats (abs) min: 1 max: 14 x̄: 5.67 x̃: 4 helped stats (rel) min: 0.12% max: 4.71% x̄: 1.96% x̃: 0.76% 95% mean confidence interval for instructions value: -6.62 -4.72 95% mean confidence interval for instructions %-change: -2.27% -1.64% Instructions are helped. total cycles in shared programs: 179296340 -> 178788044 (-0.28%) cycles in affected programs: 51009603 -> 50501307 (-1.00%) helped: 82 HURT: 7 helped stats (abs) min: 5 max: 27820 x̄: 6199.00 x̃: 16 helped stats (rel) min: 0.30% max: 8.16% x̄: 2.58% x̃: 3.11% HURT stats (abs) min: 2 max: 8 x̄: 3.14 x̃: 2 HURT stats (rel) min: 0.02% max: 1.40% x̄: 0.34% x̃: 0.10% 95% mean confidence interval for cycles value: -7649.38 -3773.00 95% mean confidence interval for cycles %-change: -2.71% -1.99% Cycles are helped. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> [v2] Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py26
1 files changed, 25 insertions, 1 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index bf5c69314c5..40f718e5ae2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -224,7 +224,31 @@ optimizations = [
# (a * #b) << #c
# a * (#b << #c)
(('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
+]
+
+# Care must be taken here. Shifts in NIR uses only the lower log2(bitsize)
+# bits of the second source. These replacements must correctly handle the
+# case where (b % bitsize) + (c % bitsize) >= bitsize.
+for s in [8, 16, 32, 64]:
+ mask = (1 << s) - 1
+
+ ishl = "ishl@{}".format(s)
+ ishr = "ishr@{}".format(s)
+ ushr = "ushr@{}".format(s)
+
+ in_bounds = ('ult', ('iadd', ('iand', b, mask), ('iand', c, mask)), s)
+
+ optimizations.extend([
+ ((ishl, (ishl, a, '#b'), '#c'), ('bcsel', in_bounds, (ishl, a, ('iadd', b, c)), 0)),
+ ((ushr, (ushr, a, '#b'), '#c'), ('bcsel', in_bounds, (ushr, a, ('iadd', b, c)), 0)),
+ # To get get -1 for large shifts of negative values, ishr must instead
+ # clamp the shift count to the maximum value.
+ ((ishr, (ishr, a, '#b'), '#c'),
+ (ishr, a, ('imin', ('iadd', ('iand', b, mask), ('iand', c, mask)), s - 1))),
+ ])
+
+optimizations.extend([
# This is common for address calculations. Reassociating may enable the
# 'a<<c' to be CSE'd. It also helps architectures that have an ISHLADD
# instruction or a constant offset field for in load / store instructions.
@@ -859,7 +883,7 @@ optimizations = [
(('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
(('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'),
(('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte')
-]
+])
# After the ('extract_u8', a, 0) pattern, above, triggers, there will be
# patterns like those below.