diff options
author | Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> | 2021-01-26 17:46:44 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-17 09:11:46 +0000 |
commit | bddc0e023c2c87d3248691ea62b77626704cc5a4 (patch) | |
tree | e6513e21118af5e2c6e0895a29f4394d5bfb6330 /src | |
parent | a8373b3d3876afa960ead3378adacc43afcec6ed (diff) |
radeonsi: fix read from compute / write from draw sync
A compute dispatch should see the result of a previous draw command.
radeonsi was missing this implicit sync, causing rendering artifacts:
the compute shader was reading from a texture still being written to
by the previous draw.
Framebuffer BOs are marked with RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
so compute jobs will sync.
v2: use RADEON_USAGE_NEEDS_IMPLICIT_SYNC
v3: unconditionally make CB coherent after a flush
Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com> (v3)
Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v3)
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4032
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2878
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1336
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8869>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 46 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_gfx_cs.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 8 |
5 files changed, 67 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 9f83c6ad770..44dbf532a10 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -98,7 +98,12 @@ enum radeon_bo_usage /* The winsys ensures that the CS submission will be scheduled after * previously flushed CSs referencing this BO in a conflicting way. */ - RADEON_USAGE_SYNCHRONIZED = 8 + RADEON_USAGE_SYNCHRONIZED = 8, + + /* When used, an implicit sync is done to make sure a compute shader + * will read the written values from a previous draw. + */ + RADEON_USAGE_NEEDS_IMPLICIT_SYNC = 16, }; enum radeon_map_flags diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 0f1ece6965d..e2752368439 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -823,6 +823,47 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ radeon_end(); } +static bool si_check_needs_implicit_sync(struct si_context *sctx) +{ + /* If the compute shader is going to read from a texture/image written by a + * previous draw, we must wait for its completion before continuing. + * Buffers and image stores (from the draw) are not taken into consideration + * because that's the app responsibility. + * + * The OpenGL 4.6 spec says: + * + * buffer object and texture stores performed by shaders are not + * automatically synchronized + */ + struct si_shader_info *info = &sctx->cs_shader_state.program->sel.info; + struct si_samplers *samplers = &sctx->samplers[PIPE_SHADER_COMPUTE]; + unsigned mask = samplers->enabled_mask & info->base.textures_used; + + while (mask) { + int i = u_bit_scan(&mask); + struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; + + struct si_resource *res = si_resource(sview->base.texture); + if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf, + RADEON_USAGE_NEEDS_IMPLICIT_SYNC)) + return true; + } + + struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; + mask = u_bit_consecutive(0, info->base.num_images); + + while (mask) { + int i = u_bit_scan(&mask); + struct pipe_image_view *sview = &images->views[i]; + + struct si_resource *res = si_resource(sview->resource); + if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf, + RADEON_USAGE_NEEDS_IMPLICIT_SYNC)) + return true; + } + return false; +} + static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) { struct si_context *sctx = (struct si_context *)ctx; @@ -849,6 +890,11 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if (sctx->last_num_draw_calls != sctx->num_draw_calls) { si_update_fb_dirtiness_after_rendering(sctx); sctx->last_num_draw_calls = sctx->num_draw_calls; + + if (sctx->force_cb_shader_coherent || si_check_needs_implicit_sync(sctx)) + si_make_CB_shader_coherent(sctx, 0, + sctx->framebuffer.CB_has_shader_readable_metadata, + sctx->framebuffer.all_DCC_pipe_aligned); } si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index a63e7db6deb..200dceb6003 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -551,6 +551,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) ctx->index_ring_base = ctx->index_ring_size_per_ib; ctx->index_ring_offset = 0; + + /* All buffer references are removed on a flush, so si_check_needs_implicit_sync + * cannot determine if si_make_CB_shader_coherent() needs to be called. + * ctx->force_cb_shader_coherent will be cleared by the first call to + * si_make_CB_shader_coherent. + */ + ctx->force_cb_shader_coherent = true; } void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 27da51eaff8..2aa13948fba 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1266,6 +1266,8 @@ struct si_context { struct list_head shader_query_buffers; unsigned num_active_shader_queries; + bool force_cb_shader_coherent; + /* Statistics gathering for the DCC enablement heuristic. It can't be * in si_texture because si_texture can be shared by multiple * contexts. This is for back buffers only. We shouldn't get too many @@ -1745,6 +1747,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned bool shaders_read_metadata, bool dcc_pipe_aligned) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE; + sctx->force_cb_shader_coherent = false; if (sctx->chip_class >= GFX10) { if (sctx->screen->info.tcc_harvested) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fb90fc80511..95f5a9834ee 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2955,17 +2955,19 @@ static void si_emit_framebuffer_state(struct si_context *sctx) tex = (struct si_texture *)cb->base.texture; radeon_add_to_buffer_list( - sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, + sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER); if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, + RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, RADEON_PRIO_SEPARATE_META); } if (tex->dcc_separate_buffer) radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); + RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, + RADEON_PRIO_SEPARATE_META); /* Compute mutable surface parameters. */ cb_color_base = tex->buffer.gpu_address >> 8; |