summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2018-02-03 09:12:15 -0800
committerJason Ekstrand <jason@jlekstrand.net>2019-09-06 23:35:09 +0000
commit34541be7b04d76c5589600553995467daca6c30d (patch)
tree99e414562ea7050ededeaac678c541deb730f200
parentd62ca48c31f8dcbc90c6ae9a8e01a668cc9fe68e (diff)
intel/blorp: Use wide formats for nicely aligned stencil clears
In the case where the stencil clear is nicely aligned, we can clear stencil much more efficiently by mapping it as a wide format (say RGBA32_UINT) and blasting out the stencil clear value with a repclear. On Unigine Heaven, this makes one stencil clear go from non-trivial to unnoticeable when looking at per-draw timings. In order for this change to work properly, ANV needs to do a bit more flushing around depth and stencil clears. i965 and iris already have the cache tracking logic to handle this so no changes are required there. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
-rw-r--r--src/intel/blorp/blorp_clear.c108
-rw-r--r--src/intel/vulkan/anv_blorp.c14
2 files changed, 122 insertions, 0 deletions
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index ba103c8377e..595bac18e61 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -565,6 +565,107 @@ blorp_clear(struct blorp_batch *batch,
}
}
+static bool
+blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
+ const struct blorp_surf *surf,
+ uint32_t level, uint32_t start_layer,
+ uint32_t num_layers,
+ uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
+ uint8_t stencil_mask, uint8_t stencil_value)
+{
+ /* We only support separate W-tiled stencil for now */
+ if (surf->surf->format != ISL_FORMAT_R8_UINT ||
+ surf->surf->tiling != ISL_TILING_W)
+ return false;
+
+ /* Stencil mask support would require piles of shader magic */
+ if (stencil_mask != 0xff)
+ return false;
+
+ if (surf->surf->samples > 1) {
+ /* Adjust x0, y0, x1, and y1 to be in units of samples */
+ assert(surf->surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
+ struct isl_extent2d msaa_px_size_sa =
+ isl_get_interleaved_msaa_px_size_sa(surf->surf->samples);
+
+ x0 *= msaa_px_size_sa.w;
+ y0 *= msaa_px_size_sa.h;
+ x1 *= msaa_px_size_sa.w;
+ y1 *= msaa_px_size_sa.h;
+ }
+
+ /* W-tiles and Y-tiles have the same layout as far as cache lines are
+ * concerned: both are 8x8 cache lines laid out Y-major. The difference is
+ * entirely in how the data is arranged withing the cache line. W-tiling
+ * is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows
+ * regardless of image format size. As long as everything is aligned to 8,
+ * we can just treat the W-tiled image as Y-tiled, ignore the layout
+ * difference within a cache line, and blast out data.
+ */
+ if (x0 % 8 != 0 || y0 % 8 != 0 || x1 % 8 != 0 || y1 % 8 != 0)
+ return false;
+
+ struct blorp_params params;
+ blorp_params_init(&params);
+
+ if (!blorp_params_get_clear_kernel(batch, &params, true, false))
+ return false;
+
+ memset(&params.wm_inputs.clear_color, stencil_value,
+ sizeof(params.wm_inputs.clear_color));
+
+ /* The Sandy Bridge PRM Vol. 4 Pt. 2, section 2.11.2.1.1 has the
+ * following footnote to the format table:
+ *
+ * 128 BPE Formats cannot be Tiled Y when used as render targets
+ *
+ * We have to use RGBA16_UINT on SNB.
+ */
+ enum isl_format wide_format;
+ if (ISL_DEV_GEN(batch->blorp->isl_dev) <= 6) {
+ wide_format = ISL_FORMAT_R16G16B16A16_UINT;
+
+ /* For RGBA16_UINT, we need to mask the stencil value otherwise, we risk
+ * clamping giving us the wrong values
+ */
+ for (unsigned i = 0; i < 4; i++)
+ params.wm_inputs.clear_color[i] &= 0xffff;
+ } else {
+ wide_format = ISL_FORMAT_R32G32B32A32_UINT;
+ }
+
+ for (uint32_t a = 0; a < num_layers; a++) {
+ uint32_t layer = start_layer + a;
+
+ brw_blorp_surface_info_init(batch->blorp, &params.dst, surf, level,
+ layer, ISL_FORMAT_UNSUPPORTED, true);
+
+ if (surf->surf->samples > 1)
+ blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, &params.dst);
+
+ /* Make it Y-tiled */
+ blorp_surf_retile_w_to_y(batch->blorp->isl_dev, &params.dst);
+
+ unsigned wide_Bpp =
+ isl_format_get_layout(wide_format)->bpb / 8;
+
+ params.dst.view.format = params.dst.surf.format = wide_format;
+ assert(params.dst.surf.logical_level0_px.width % wide_Bpp == 0);
+ params.dst.surf.logical_level0_px.width /= wide_Bpp;
+ assert(params.dst.tile_x_sa % wide_Bpp == 0);
+ params.dst.tile_x_sa /= wide_Bpp;
+
+ params.x0 = params.dst.tile_x_sa + x0 / (wide_Bpp / 2);
+ params.y0 = params.dst.tile_y_sa + y0 / 2;
+ params.x1 = params.dst.tile_x_sa + x1 / (wide_Bpp / 2);
+ params.y1 = params.dst.tile_y_sa + y1 / 2;
+
+ batch->blorp->exec(batch, &params);
+ }
+
+ return true;
+}
+
void
blorp_clear_depth_stencil(struct blorp_batch *batch,
const struct blorp_surf *depth,
@@ -575,6 +676,13 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
bool clear_depth, float depth_value,
uint8_t stencil_mask, uint8_t stencil_value)
{
+ if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
+ start_layer, num_layers,
+ x0, y0, x1, y1,
+ stencil_mask,
+ stencil_value))
+ return;
+
struct blorp_params params;
blorp_params_init(&params);
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 17f0ffad576..2cab98dbe16 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1527,6 +1527,13 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
ISL_AUX_USAGE_NONE, &stencil);
}
+ /* Blorp may choose to clear stencil using RGBA32_UINT for better
+ * performance. If it does this, we need to flush it out of the depth
+ * cache before rendering to it.
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
blorp_clear_depth_stencil(&batch, &depth, &stencil,
level, base_layer, layer_count,
area.offset.x, area.offset.y,
@@ -1537,6 +1544,13 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
(aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
stencil_value);
+ /* Blorp may choose to clear stencil using RGBA32_UINT for better
+ * performance. If it does this, we need to flush it out of the render
+ * cache before someone starts trying to do stencil on it.
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
struct blorp_surf stencil_shadow;
if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,