summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2022-06-06 05:27:14 -0400
committerMarge Bot <emma+marge@anholt.net>2022-06-11 11:14:16 +0000
commitdfa8dcf80e44b66311513bbb80074da25722c5e3 (patch)
tree858da834db83e43eb6e343c8c5751cd3aaea910b
parentdbbbe73d050920cc149bf99be912221a30920750 (diff)
radeonsi: remove streamout code from shaders if no streamout buffers are bound
This is an optimization using asynchronous shader compilation. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h8
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_llvm.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.cpp9
-rw-r--r--src/gallium/drivers/radeonsi/si_state_streamout.c6
6 files changed, 24 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index da783a7f6e1..8993051965b 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -615,7 +615,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
/* The edgeflag is always stored in the last element that's also
* used for padding to reduce LDS bank conflicts. */
- if (shader->selector->info.enabled_streamout_buffer_mask)
+ if (si_shader_uses_streamout(shader))
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
if (gfx10_ngg_writes_user_edgeflags(shader))
lds_vertex_size = MAX2(lds_vertex_size, 1);
@@ -2248,7 +2248,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
{
const struct si_shader_selector *sel = shader->selector;
- if (sel->stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
+ if (sel->stage == MESA_SHADER_GEOMETRY && si_shader_uses_streamout(shader))
return 44;
return 8;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e3961f47aff..0fb7abd37a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1760,7 +1760,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
}
struct pipe_stream_output_info so = {};
- if (sel->info.enabled_streamout_buffer_mask)
+ if (si_shader_uses_streamout(shader))
nir_gather_stream_output_info(nir, &so);
/* Dump NIR before doing NIR->LLVM conversion in case the
@@ -2501,7 +2501,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->uses_gs_state_outprim = sscreen->use_ngg &&
/* Only used by streamout in vertex shaders. */
sel->stage == MESA_SHADER_VERTEX &&
- sel->info.enabled_streamout_buffer_mask;
+ si_shader_uses_streamout(shader);
if (sel->stage == MESA_SHADER_VERTEX) {
shader->uses_base_instance = sel->info.uses_base_instance ||
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 5fa538b87e0..2eee541dfc1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -698,6 +698,7 @@ struct si_shader_key_ge {
uint64_t kill_outputs; /* "get_unique_index" bits */
unsigned kill_clip_distances : 8;
unsigned kill_pointsize : 1;
+ unsigned remove_streamout : 1;
/* For NGG VS and TES. */
unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
@@ -1045,6 +1046,13 @@ static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
shader->selector->info.writes_edgeflag;
}
+static inline bool si_shader_uses_streamout(struct si_shader *shader)
+{
+ return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
+ shader->selector->info.enabled_streamout_buffer_mask &&
+ !shader->key.ge.opt.remove_streamout;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index c383d6dbd78..245ca5c4638 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -189,7 +189,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
}
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
- ctx->shader->selector->info.enabled_streamout_buffer_mask)
+ si_shader_uses_streamout(ctx->shader))
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 8e8e364098f..fc2bbb727f9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -1554,7 +1554,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
}
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
- shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
+ shader->ctx_reg.ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader);
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
}
@@ -1745,12 +1745,12 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
if (sscreen->info.gfx_level <= GFX9)
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
- if (!sscreen->use_ngg_streamout) {
+ if (!sscreen->use_ngg_streamout && si_shader_uses_streamout(shader)) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
- S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
+ S_00B12C_SO_EN(1);
}
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
@@ -2216,6 +2216,8 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
key->ge.opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
+ key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask &&
+ !sctx->streamout.enabled_mask;
}
static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
@@ -2223,6 +2225,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se
{
key->ge.opt.kill_clip_distances = 0;
key->ge.opt.kill_outputs = 0;
+ key->ge.opt.remove_streamout = 0;
key->ge.opt.ngg_culling = 0;
key->ge.mono.u.vs_export_prim_id = 0;
key->ge.opt.kill_pointsize = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 52cb4eccc1f..230dcf00bf9 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -169,7 +169,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
for (; i < sctx->streamout.num_targets; i++)
si_so_target_reference(&sctx->streamout.targets[i], NULL);
- sctx->streamout.enabled_mask = enabled_mask;
+ if (!!sctx->streamout.enabled_mask != !!enabled_mask) {
+ sctx->streamout.enabled_mask = enabled_mask;
+ sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */
+ }
+
sctx->streamout.num_targets = num_targets;
sctx->streamout.append_bitmask = append_bitmask;