summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmma Anholt <emma@anholt.net>2021-12-14 14:35:03 -0800
committerMarge Bot <emma+marge@anholt.net>2022-01-07 09:58:24 +0000
commit558a6006299544ee5f77843f094015c62558f4ad (patch)
treef288886e9a2c3bdb2ed6c4ce2d1632335b8382fe
parent85d7d520b993579ebebaa1c279e7d93015223d8f (diff)
nir_to_tgsi: Enable fdot_replicates flag.
That's how the TGSI math opcodes work. This lets lower_vec_to_regs coalesce the DP output into the .yzw channels, giving an impressive shader-db win on softpipe: total instructions in shared programs: 2929840 -> 2794036 (-4.64%) instructions in affected programs: 1651438 -> 1515634 (-8.22%) total temps in shared programs: 372730 -> 332744 (-10.73%) temps in affected programs: 118151 -> 78165 (-33.84%) and a minor one on r300: total instructions in shared programs: 51238 -> 51149 (-0.17%) instructions in affected programs: 2621 -> 2532 (-3.40%) total vinst in shared programs: 15655 -> 15618 (-0.24%) vinst in affected programs: 468 -> 431 (-7.91%) total temps in shared programs: 9838 -> 9828 (-0.10%) temps in affected programs: 59 -> 49 (-16.95%) and a bigger one on i915g: total instructions in shared programs: 398064 -> 395901 (-0.54%) instructions in affected programs: 29271 -> 27108 (-7.39%) total tex_indirect in shared programs: 12261 -> 12233 (-0.23%) tex_indirect in affected programs: 98 -> 70 (-28.57%) LOST: 0 GAINED: 5 The r300 change is less impressive because it does some backend copy-prop, but also because intermediate storage of DPs now takes a vec4 instead of a scalar. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14200>
-rw-r--r--src/compiler/nir/nir_builder_opcodes_h.py5
-rw-r--r--src/gallium/auxiliary/nir/nir_to_tgsi.c4
-rw-r--r--src/gallium/drivers/i915/i915_screen.c2
-rw-r--r--src/gallium/drivers/r300/r300_screen.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c1
5 files changed, 16 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py
index 35e5ca7a506..7fc6af9c776 100644
--- a/src/compiler/nir/nir_builder_opcodes_h.py
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -30,9 +30,13 @@ def src_decl_list(num_srcs):
def src_list(num_srcs):
return ', '.join('src' + str(i) for i in range(num_srcs))
+
+def needs_num_components(opcode):
+ return "replicated" in opcode.name
%>
% for name, opcode in sorted(opcodes.items()):
+% if not needs_num_components(opcode):
static inline nir_ssa_def *
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
{
@@ -43,6 +47,7 @@ nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
return nir_build_alu_src_arr(build, nir_op_${name}, srcs);
% endif
}
+% endif
% endfor
% for name, opcode in sorted(INTR_OPCODES.items()):
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 5ad01306fbd..e5097d78cac 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -858,6 +858,9 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
[nir_op_fdot2] = { TGSI_OPCODE_DP2 },
[nir_op_fdot3] = { TGSI_OPCODE_DP3 },
[nir_op_fdot4] = { TGSI_OPCODE_DP4 },
+ [nir_op_fdot2_replicated] = { TGSI_OPCODE_DP2 },
+ [nir_op_fdot3_replicated] = { TGSI_OPCODE_DP3 },
+ [nir_op_fdot4_replicated] = { TGSI_OPCODE_DP4 },
[nir_op_ffloor] = { TGSI_OPCODE_FLR, TGSI_OPCODE_DFLR },
[nir_op_ffract] = { TGSI_OPCODE_FRC, TGSI_OPCODE_DFRAC },
[nir_op_fceil] = { TGSI_OPCODE_CEIL, TGSI_OPCODE_DCEIL },
@@ -3191,6 +3194,7 @@ nir_to_tgsi(struct nir_shader *s,
}
static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_extract_byte = true,
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index d2ab19c4cda..75d57e1cbce 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -106,6 +106,7 @@ i915_get_name(struct pipe_screen *screen)
}
static const nir_shader_compiler_options i915_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */
.lower_extract_byte = true,
@@ -122,6 +123,7 @@ static const nir_shader_compiler_options i915_compiler_options = {
};
static const struct nir_shader_compiler_options gallivm_nir_options = {
+ .fdot_replicates = true,
.lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */
.lower_scmp = true,
.lower_flrp32 = true,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 27c6835e339..8d4f902722f 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -474,6 +474,7 @@ static int r300_get_video_param(struct pipe_screen *screen,
}
static const nir_shader_compiler_options r500_vs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -499,6 +500,7 @@ static const nir_shader_compiler_options r500_vs_compiler_options = {
};
static const nir_shader_compiler_options r500_fs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -525,6 +527,7 @@ static const nir_shader_compiler_options r500_fs_compiler_options = {
};
static const nir_shader_compiler_options r300_vs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -549,6 +552,7 @@ static const nir_shader_compiler_options r300_vs_compiler_options = {
};
static const nir_shader_compiler_options r300_fs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index c87d4067cfc..4984f608011 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -75,6 +75,7 @@ softpipe_get_name(struct pipe_screen *screen)
}
static const nir_shader_compiler_options sp_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_extract_byte = true,