diff options
author | Marek Olšák <marek.olsak@amd.com> | 2022-06-06 05:27:14 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-06-11 11:14:16 +0000 |
commit | dfa8dcf80e44b66311513bbb80074da25722c5e3 (patch) | |
tree | 858da834db83e43eb6e343c8c5751cd3aaea910b | |
parent | dbbbe73d050920cc149bf99be912221a30920750 (diff) |
radeonsi: remove streamout code from shaders if no streamout buffers are bound
This is an optimization using asynchronous shader compilation.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
-rw-r--r-- | src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_llvm.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.cpp | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_streamout.c | 6 |
6 files changed, 24 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index da783a7f6e1..8993051965b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -615,7 +615,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader) /* The edgeflag is always stored in the last element that's also * used for padding to reduce LDS bank conflicts. */ - if (shader->selector->info.enabled_streamout_buffer_mask) + if (si_shader_uses_streamout(shader)) lds_vertex_size = 4 * shader->selector->info.num_outputs + 1; if (gfx10_ngg_writes_user_edgeflags(shader)) lds_vertex_size = MAX2(lds_vertex_size, 1); @@ -2248,7 +2248,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader) { const struct si_shader_selector *sel = shader->selector; - if (sel->stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask) + if (sel->stage == MESA_SHADER_GEOMETRY && si_shader_uses_streamout(shader)) return 44; return 8; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e3961f47aff..0fb7abd37a3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1760,7 +1760,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi } struct pipe_stream_output_info so = {}; - if (sel->info.enabled_streamout_buffer_mask) + if (si_shader_uses_streamout(shader)) nir_gather_stream_output_info(nir, &so); /* Dump NIR before doing NIR->LLVM conversion in case the @@ -2501,7 +2501,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler shader->uses_gs_state_outprim = sscreen->use_ngg && /* Only used by streamout in vertex shaders. */ sel->stage == MESA_SHADER_VERTEX && - sel->info.enabled_streamout_buffer_mask; + si_shader_uses_streamout(shader); if (sel->stage == MESA_SHADER_VERTEX) { shader->uses_base_instance = sel->info.uses_base_instance || diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 5fa538b87e0..2eee541dfc1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -698,6 +698,7 @@ struct si_shader_key_ge { uint64_t kill_outputs; /* "get_unique_index" bits */ unsigned kill_clip_distances : 8; unsigned kill_pointsize : 1; + unsigned remove_streamout : 1; /* For NGG VS and TES. */ unsigned ngg_culling : 13; /* SI_NGG_CULL_* */ @@ -1045,6 +1046,13 @@ static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader) shader->selector->info.writes_edgeflag; } +static inline bool si_shader_uses_streamout(struct si_shader *shader) +{ + return shader->selector->stage <= MESA_SHADER_GEOMETRY && + shader->selector->info.enabled_streamout_buffer_mask && + !shader->key.ge.opt.remove_streamout; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index c383d6dbd78..245ca5c4638 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -189,7 +189,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy } if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg && - ctx->shader->selector->info.enabled_streamout_buffer_mask) + si_shader_uses_streamout(ctx->shader)) ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256); ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 8e8e364098f..fc2bbb727f9 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1554,7 +1554,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader } shader->ctx_reg.ngg.vgt_stages.u.ngg = 1; - shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask; + shader->ctx_reg.ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader); shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader); shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32; } @@ -1745,12 +1745,12 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, if (sscreen->info.gfx_level <= GFX9) rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8); - if (!sscreen->use_ngg_streamout) { + if (!sscreen->use_ngg_streamout && si_shader_uses_streamout(shader)) { rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) | S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) | S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) | S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) | - S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask); + S_00B12C_SO_EN(1); } si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1); @@ -2216,6 +2216,8 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele key->ge.opt.kill_pointsize = vs->info.writes_psize && sctx->current_rast_prim != PIPE_PRIM_POINTS && !sctx->queued.named.rasterizer->polygon_mode_is_points; + key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask && + !sctx->streamout.enabled_mask; } static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs, @@ -2223,6 +2225,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se { key->ge.opt.kill_clip_distances = 0; key->ge.opt.kill_outputs = 0; + key->ge.opt.remove_streamout = 0; key->ge.opt.ngg_culling = 0; key->ge.mono.u.vs_export_prim_id = 0; key->ge.opt.kill_pointsize = 0; diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 52cb4eccc1f..230dcf00bf9 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -169,7 +169,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ for (; i < sctx->streamout.num_targets; i++) si_so_target_reference(&sctx->streamout.targets[i], NULL); - sctx->streamout.enabled_mask = enabled_mask; + if (!!sctx->streamout.enabled_mask != !!enabled_mask) { + sctx->streamout.enabled_mask = enabled_mask; + sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */ + } + sctx->streamout.num_targets = num_targets; sctx->streamout.append_bitmask = append_bitmask; |