summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-05-02 17:58:15 -0400
committerMarge Bot <eric+marge@anholt.net>2020-05-05 16:27:29 +0000
commit0d83e7f4b9887346e9b7b4d44c068d340aa04f28 (patch)
tree980792f85fb321a09f7d05ed6c71e04ebaea74b3
parent39571d384e02848aff8c8fe635ff4b93d740aab3 (diff)
radeonsi: enable TC-compatible HTILE on demand for best Z/S performance
I haven't measured this, but it can only help. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4866>
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_clear.c37
-rw-r--r--src/gallium/drivers/radeonsi/si_compute_blit.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_texture.c5
5 files changed, 57 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 057cdc6ce31..12bea9eba31 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -359,6 +359,17 @@ static void si_decompress_depth(struct si_context *sctx, struct si_texture *tex,
tex->stencil_dirty_level_mask &= ~levels_s;
}
+ /* We just had to completely decompress Z/S for texturing. Enable
+ * TC-compatible HTILE on the next clear, so that the decompression
+ * doesn't have to be done for this texture ever again.
+ *
+ * TC-compatible HTILE might slightly reduce Z/S performance, but
+ * the decompression is much worse.
+ */
+ if (has_htile && !tc_compat_htile &&
+ tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
+ tex->enable_tc_compatible_htile_next_clear = true;
+
/* Only in-place decompression needs to flush DB caches, or
* when we don't decompress but TC-compatible planes are dirty.
*/
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 12512e7df73..096680ccdac 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -570,6 +570,43 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
if (zstex && zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
+ /* See whether we should enable TC-compatible HTILE. */
+ if (zstex->enable_tc_compatible_htile_next_clear &&
+ !zstex->tc_compatible_htile &&
+ si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS) &&
+ /* If both depth and stencil are present, they must be cleared together. */
+ ((buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
+ (buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
+ zstex->htile_stencil_disabled)))) {
+ /* Enable TC-compatible HTILE. */
+ zstex->enable_tc_compatible_htile_next_clear = false;
+ zstex->tc_compatible_htile = true;
+
+ /* Update the framebuffer state to reflect the change. */
+ sctx->framebuffer.DB_has_shader_readable_metadata = true;
+ sctx->framebuffer.dirty_zsbuf = true;
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
+
+ /* Update all sampler views and shader images in all contexts. */
+ p_atomic_inc(&sctx->screen->dirty_tex_counter);
+
+ /* Re-initialize HTILE, so that it doesn't contain values incompatible
+ * with the new TC-compatible HTILE setting.
+ *
+ * 0xfffff30f = uncompressed Z + S
+ * 0xfffc000f = uncompressed Z only
+ *
+ * GFX8 always uses the Z+S HTILE format for TC-compatible HTILE even
+ * when stencil is not present.
+ */
+ uint32_t clear_value = (zstex->surface.has_stencil &&
+ !zstex->htile_stencil_disabled) ||
+ sctx->chip_class == GFX8 ? 0xfffff30f : 0xfffc000f;
+ si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.htile_offset,
+ zstex->surface.htile_size, &clear_value, 4,
+ SI_COHERENCY_DB_META, false);
+ }
+
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_Z) &&
(!zstex->tc_compatible_htile || depth == 0 || depth == 1)) {
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index 8772475d124..53bc34df508 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -33,7 +33,9 @@
static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher,
uint64_t size)
{
- if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) ||
+ if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META ||
+ coher == SI_COHERENCY_DB_META ||
+ coher == SI_COHERENCY_CP)) ||
(sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER))
return size <= 256 * 1024 ? L2_LRU : L2_STREAM;
@@ -53,6 +55,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
(cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0);
case SI_COHERENCY_CB_META:
return SI_CONTEXT_FLUSH_AND_INV_CB;
+ case SI_COHERENCY_DB_META:
+ return SI_CONTEXT_FLUSH_AND_INV_DB;
}
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6b934c1dff4..a5dbf655928 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -239,6 +239,7 @@ enum si_coherency
SI_COHERENCY_NONE, /* no cache flushes needed */
SI_COHERENCY_SHADER,
SI_COHERENCY_CB_META,
+ SI_COHERENCY_DB_META,
SI_COHERENCY_CP,
};
@@ -336,6 +337,7 @@ struct si_texture {
uint8_t stencil_clear_value;
bool fmask_is_identity : 1;
bool tc_compatible_htile : 1;
+ bool enable_tc_compatible_htile_next_clear : 1;
bool htile_stencil_disabled : 1;
bool depth_cleared : 1; /* if it was cleared at least once */
bool stencil_cleared : 1; /* if it was cleared at least once */
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 9914b214864..1c2c170c97e 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1207,13 +1207,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
/* don't include stencil-only formats which we don't support for rendering */
tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
tex->surface = *surface;
- tex->tc_compatible_htile =
- tex->surface.htile_size != 0 && (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
+ tex->tc_compatible_htile = false; /* This will be enabled on demand. */
/* TC-compatible HTILE:
* - GFX8 only supports Z32_FLOAT.
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
- if (tex->tc_compatible_htile) {
+ if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
tex->db_render_format = base->format;
else {