diff options
author | Marek Olšák <marek.olsak@amd.com> | 2021-01-11 15:14:40 -0500 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2021-01-30 15:41:23 -0500 |
commit | 47587758f21019229be5faf9417e7dc5b9ea732d (patch) | |
tree | 2d426fbbfe760e492be4a27dfae7de0d926f992a | |
parent | d5b529ad1486dd6a31fe394ad0e4b18bf3a79d6b (diff) |
radeonsi: prefetch VB descriptors right after uploading
This skips the logic that sets and checks prefetch_L2_mask.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8794>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_gfx_cs.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.cpp | 49 |
3 files changed, 16 insertions, 36 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 01bce3ffe28..a63e7db6deb 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -445,8 +445,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) ctx->prefetch_L2_mask |= SI_PREFETCH_VS; if (ctx->queued.named.ps) ctx->prefetch_L2_mask |= SI_PREFETCH_PS; - if (ctx->vb_descriptors_buffer) - ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */ bool has_clear_state = ctx->screen->info.has_clear_state; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6c9d8130d73..fd161034cb1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -99,7 +99,6 @@ extern "C" { #define SI_CONTEXT_VGT_FLUSH (1 << 15) #define SI_CONTEXT_VGT_STREAMOUT_SYNC (1 << 16) -#define SI_PREFETCH_VBO_DESCRIPTORS (1 << 0) #define SI_PREFETCH_LS (1 << 1) #define SI_PREFETCH_HS (1 << 2) #define SI_PREFETCH_ES (1 << 3) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index fb4f721dbc4..2bb7035ffc7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -66,15 +66,6 @@ static void si_prefetch_shader_async(struct si_context *sctx, struct si_pm4_stat si_cp_dma_prefetch(sctx, bo, 0, bo->width0); } -static void si_prefetch_VBO_descriptors(struct si_context *sctx) -{ - if (!sctx->vertex_elements->vb_desc_list_alloc_size) - return; - - si_cp_dma_prefetch(sctx, &sctx->vb_descriptors_buffer->b.b, sctx->vb_descriptors_offset, - sctx->vertex_elements->vb_desc_list_alloc_size); -} - enum si_L2_prefetch_mode { PREFETCH_BEFORE_DRAW = 1, PREFETCH_AFTER_DRAW, @@ -82,11 +73,11 @@ enum si_L2_prefetch_mode { }; /** - * Prefetch shaders and VBO descriptors. + * Prefetch shaders. */ template<chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, si_L2_prefetch_mode mode> -static void si_emit_prefetch_L2(struct si_context *sctx) +static void si_prefetch_shaders(struct si_context *sctx) { unsigned mask = sctx->prefetch_L2_mask; @@ -101,8 +92,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_HS) si_prefetch_shader_async(sctx, sctx->queued.named.hs); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -116,8 +105,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_GS) si_prefetch_shader_async(sctx, sctx->queued.named.gs); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -129,8 +116,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_VS) si_prefetch_shader_async(sctx, sctx->queued.named.vs); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -143,8 +128,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_LS) si_prefetch_shader_async(sctx, sctx->queued.named.ls); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -162,8 +145,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_ES) si_prefetch_shader_async(sctx, sctx->queued.named.es); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -177,8 +158,6 @@ static void si_emit_prefetch_L2(struct si_context *sctx) if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_VS) si_prefetch_shader_async(sctx, sctx->queued.named.vs); - if (mask & SI_PREFETCH_VBO_DESCRIPTORS) - si_prefetch_VBO_descriptors(sctx); if (mode == PREFETCH_BEFORE_DRAW) return; @@ -1331,7 +1310,7 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx) } template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE -static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) +static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx) { unsigned count = sctx->num_vertex_elements; bool pointer_dirty, user_sgprs_dirty; @@ -1362,10 +1341,10 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) sctx->vb_descriptors_gpu_list = ptr; radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); - sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; + si_cp_dma_prefetch(sctx, &sctx->vb_descriptors_buffer->b.b, sctx->vb_descriptors_offset, + alloc_size); } else { si_resource_reference(&sctx->vb_descriptors_buffer, NULL); - sctx->prefetch_L2_mask &= ~SI_PREFETCH_VBO_DESCRIPTORS; } unsigned first_vb_use_mask = velems->first_vb_use_mask; @@ -2142,8 +2121,10 @@ static void si_draw_vbo(struct pipe_context *ctx, sctx->emit_cache_flush(sctx, &sctx->gfx_cs); /* <-- CUs are idle here. */ - /* This uploads VBO descriptors and sets user SGPRs. */ - if (unlikely((!si_upload_vertex_buffer_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) { + /* This uploads VBO descriptors, sets user SGPRs, and executes the L2 prefetch. + * It should done after cache flushing. + */ + if (unlikely((!si_upload_and_prefetch_VB_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) { DRAW_CLEANUP; return; } @@ -2170,7 +2151,7 @@ static void si_draw_vbo(struct pipe_context *ctx, /* Start prefetches after the draw has been started. Both will run * in parallel, but starting the draw first is more important. */ - si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_ALL>(sctx); + si_prefetch_shaders<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_ALL>(sctx); } else { /* If we don't wait for idle, start prefetches first, then set * states, and draw at the end. @@ -2179,10 +2160,12 @@ static void si_draw_vbo(struct pipe_context *ctx, sctx->emit_cache_flush(sctx, &sctx->gfx_cs); /* Only prefetch the API VS and VBO descriptors. */ - si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_BEFORE_DRAW>(sctx); + si_prefetch_shaders<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_BEFORE_DRAW>(sctx); - /* This uploads VBO descriptors and sets user SGPRs. */ - if (unlikely((!si_upload_vertex_buffer_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) { + /* This uploads VBO descriptors, sets user SGPRs, and executes the L2 prefetch. + * It should done after cache flushing and after the VS prefetch. + */ + if (unlikely((!si_upload_and_prefetch_VB_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) { DRAW_CLEANUP; return; } @@ -2206,7 +2189,7 @@ static void si_draw_vbo(struct pipe_context *ctx, /* Prefetch the remaining shaders after the draw has been * started. */ - si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_AFTER_DRAW>(sctx); + si_prefetch_shaders<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_AFTER_DRAW>(sctx); } /* Clear the context roll flag after the draw call. |