diff options
author | Emma Anholt <emma@anholt.net> | 2021-12-14 14:35:03 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-01-07 09:58:24 +0000 |
commit | 558a6006299544ee5f77843f094015c62558f4ad (patch) | |
tree | f288886e9a2c3bdb2ed6c4ce2d1632335b8382fe | |
parent | 85d7d520b993579ebebaa1c279e7d93015223d8f (diff) |
nir_to_tgsi: Enable fdot_replicates flag.
That's how the TGSI math opcodes work.
This lets lower_vec_to_regs coalesce the DP output into the .yzw channels,
giving an impressive shader-db win on softpipe:
total instructions in shared programs: 2929840 -> 2794036 (-4.64%)
instructions in affected programs: 1651438 -> 1515634 (-8.22%)
total temps in shared programs: 372730 -> 332744 (-10.73%)
temps in affected programs: 118151 -> 78165 (-33.84%)
and a minor one on r300:
total instructions in shared programs: 51238 -> 51149 (-0.17%)
instructions in affected programs: 2621 -> 2532 (-3.40%)
total vinst in shared programs: 15655 -> 15618 (-0.24%)
vinst in affected programs: 468 -> 431 (-7.91%)
total temps in shared programs: 9838 -> 9828 (-0.10%)
temps in affected programs: 59 -> 49 (-16.95%)
and a bigger one on i915g:
total instructions in shared programs: 398064 -> 395901 (-0.54%)
instructions in affected programs: 29271 -> 27108 (-7.39%)
total tex_indirect in shared programs: 12261 -> 12233 (-0.23%)
tex_indirect in affected programs: 98 -> 70 (-28.57%)
LOST: 0
GAINED: 5
The r300 change is less impressive because it does some backend copy-prop,
but also because intermediate storage of DPs now takes a vec4 instead of a
scalar.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14200>
-rw-r--r-- | src/compiler/nir/nir_builder_opcodes_h.py | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/nir_to_tgsi.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_screen.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_screen.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_screen.c | 1 |
5 files changed, 16 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py index 35e5ca7a506..7fc6af9c776 100644 --- a/src/compiler/nir/nir_builder_opcodes_h.py +++ b/src/compiler/nir/nir_builder_opcodes_h.py @@ -30,9 +30,13 @@ def src_decl_list(num_srcs): def src_list(num_srcs): return ', '.join('src' + str(i) for i in range(num_srcs)) + +def needs_num_components(opcode): + return "replicated" in opcode.name %> % for name, opcode in sorted(opcodes.items()): +% if not needs_num_components(opcode): static inline nir_ssa_def * nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)}) { @@ -43,6 +47,7 @@ nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)}) return nir_build_alu_src_arr(build, nir_op_${name}, srcs); % endif } +% endif % endfor % for name, opcode in sorted(INTR_OPCODES.items()): diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 5ad01306fbd..e5097d78cac 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -858,6 +858,9 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) [nir_op_fdot2] = { TGSI_OPCODE_DP2 }, [nir_op_fdot3] = { TGSI_OPCODE_DP3 }, [nir_op_fdot4] = { TGSI_OPCODE_DP4 }, + [nir_op_fdot2_replicated] = { TGSI_OPCODE_DP2 }, + [nir_op_fdot3_replicated] = { TGSI_OPCODE_DP3 }, + [nir_op_fdot4_replicated] = { TGSI_OPCODE_DP4 }, [nir_op_ffloor] = { TGSI_OPCODE_FLR, TGSI_OPCODE_DFLR }, [nir_op_ffract] = { TGSI_OPCODE_FRC, TGSI_OPCODE_DFRAC }, [nir_op_fceil] = { TGSI_OPCODE_CEIL, TGSI_OPCODE_DCEIL }, @@ -3191,6 +3194,7 @@ nir_to_tgsi(struct nir_shader *s, } static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_extract_byte = true, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index d2ab19c4cda..75d57e1cbce 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -106,6 +106,7 @@ i915_get_name(struct pipe_screen *screen) } static const nir_shader_compiler_options i915_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */ .lower_extract_byte = true, @@ -122,6 +123,7 @@ static const nir_shader_compiler_options i915_compiler_options = { }; static const struct nir_shader_compiler_options gallivm_nir_options = { + .fdot_replicates = true, .lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */ .lower_scmp = true, .lower_flrp32 = true, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 27c6835e339..8d4f902722f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -474,6 +474,7 @@ static int r300_get_video_param(struct pipe_screen *screen, } static const nir_shader_compiler_options r500_vs_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_bitops = true, @@ -499,6 +500,7 @@ static const nir_shader_compiler_options r500_vs_compiler_options = { }; static const nir_shader_compiler_options r500_fs_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_bitops = true, @@ -525,6 +527,7 @@ static const nir_shader_compiler_options r500_fs_compiler_options = { }; static const nir_shader_compiler_options r300_vs_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_bitops = true, @@ -549,6 +552,7 @@ static const nir_shader_compiler_options r300_vs_compiler_options = { }; static const nir_shader_compiler_options r300_fs_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_bitops = true, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index c87d4067cfc..4984f608011 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -75,6 +75,7 @@ softpipe_get_name(struct pipe_screen *screen) } static const nir_shader_compiler_options sp_compiler_options = { + .fdot_replicates = true, .fuse_ffma32 = true, .fuse_ffma64 = true, .lower_extract_byte = true, |