From 0d83e7f4b9887346e9b7b4d44c068d340aa04f28 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 2 May 2020 17:58:15 -0400 Subject: radeonsi: enable TC-compatible HTILE on demand for best Z/S performance I haven't measured this, but it can only help. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_blit.c | 11 ++++++++ src/gallium/drivers/radeonsi/si_clear.c | 37 ++++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_compute_blit.c | 6 ++++- src/gallium/drivers/radeonsi/si_pipe.h | 2 ++ src/gallium/drivers/radeonsi/si_texture.c | 5 ++-- 5 files changed, 57 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 057cdc6ce31..12bea9eba31 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -359,6 +359,17 @@ static void si_decompress_depth(struct si_context *sctx, struct si_texture *tex, tex->stencil_dirty_level_mask &= ~levels_s; } + /* We just had to completely decompress Z/S for texturing. Enable + * TC-compatible HTILE on the next clear, so that the decompression + * doesn't have to be done for this texture ever again. + * + * TC-compatible HTILE might slightly reduce Z/S performance, but + * the decompression is much worse. + */ + if (has_htile && !tc_compat_htile && + tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) + tex->enable_tc_compatible_htile_next_clear = true; + /* Only in-place decompression needs to flush DB caches, or * when we don't decompress but TC-compatible planes are dirty. */ diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 12512e7df73..096680ccdac 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -570,6 +570,43 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, if (zstex && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) { + /* See whether we should enable TC-compatible HTILE. */ + if (zstex->enable_tc_compatible_htile_next_clear && + !zstex->tc_compatible_htile && + si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS) && + /* If both depth and stencil are present, they must be cleared together. */ + ((buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL || + (buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil || + zstex->htile_stencil_disabled)))) { + /* Enable TC-compatible HTILE. */ + zstex->enable_tc_compatible_htile_next_clear = false; + zstex->tc_compatible_htile = true; + + /* Update the framebuffer state to reflect the change. */ + sctx->framebuffer.DB_has_shader_readable_metadata = true; + sctx->framebuffer.dirty_zsbuf = true; + si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); + + /* Update all sampler views and shader images in all contexts. */ + p_atomic_inc(&sctx->screen->dirty_tex_counter); + + /* Re-initialize HTILE, so that it doesn't contain values incompatible + * with the new TC-compatible HTILE setting. + * + * 0xfffff30f = uncompressed Z + S + * 0xfffc000f = uncompressed Z only + * + * GFX8 always uses the Z+S HTILE format for TC-compatible HTILE even + * when stencil is not present. + */ + uint32_t clear_value = (zstex->surface.has_stencil && + !zstex->htile_stencil_disabled) || + sctx->chip_class == GFX8 ? 0xfffff30f : 0xfffc000f; + si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.htile_offset, + zstex->surface.htile_size, &clear_value, 4, + SI_COHERENCY_DB_META, false); + } + /* TC-compatible HTILE only supports depth clears to 0 or 1. */ if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_Z) && (!zstex->tc_compatible_htile || depth == 0 || depth == 1)) { diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 8772475d124..53bc34df508 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -33,7 +33,9 @@ static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher, uint64_t size) { - if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) || + if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || + coher == SI_COHERENCY_DB_META || + coher == SI_COHERENCY_CP)) || (sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER)) return size <= 256 * 1024 ? L2_LRU : L2_STREAM; @@ -53,6 +55,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, (cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0); case SI_COHERENCY_CB_META: return SI_CONTEXT_FLUSH_AND_INV_CB; + case SI_COHERENCY_DB_META: + return SI_CONTEXT_FLUSH_AND_INV_DB; } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6b934c1dff4..a5dbf655928 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -239,6 +239,7 @@ enum si_coherency SI_COHERENCY_NONE, /* no cache flushes needed */ SI_COHERENCY_SHADER, SI_COHERENCY_CB_META, + SI_COHERENCY_DB_META, SI_COHERENCY_CP, }; @@ -336,6 +337,7 @@ struct si_texture { uint8_t stencil_clear_value; bool fmask_is_identity : 1; bool tc_compatible_htile : 1; + bool enable_tc_compatible_htile_next_clear : 1; bool htile_stencil_disabled : 1; bool depth_cleared : 1; /* if it was cleared at least once */ bool stencil_cleared : 1; /* if it was cleared at least once */ diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 9914b214864..1c2c170c97e 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1207,13 +1207,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* don't include stencil-only formats which we don't support for rendering */ tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format)); tex->surface = *surface; - tex->tc_compatible_htile = - tex->surface.htile_size != 0 && (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE); + tex->tc_compatible_htile = false; /* This will be enabled on demand. */ /* TC-compatible HTILE: * - GFX8 only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ - if (tex->tc_compatible_htile) { + if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) { if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM) tex->db_render_format = base->format; else { -- cgit v1.2.3