summaryrefslogtreecommitdiff
path: root/src/intel
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@collabora.com>2022-09-02 22:15:48 -0500
committerMarge Bot <emma+marge@anholt.net>2022-12-02 09:18:16 +0000
commit91090e39af0d00d4fc99b7d2bd680bcac9ce0fb9 (patch)
tree0fab94f64c1104c465083962873ec13635449756 /src/intel
parent0626b68c88df50e30e61e9fd2ba3e46144ff9ad5 (diff)
hasvk/cmd_buffer: Rip out SKL+ support
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19852>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/vulkan_hasvk/anv_genX.h7
-rw-r--r--src/intel/vulkan_hasvk/genX_blorp_exec.c8
-rw-r--r--src/intel/vulkan_hasvk/genX_cmd_buffer.c924
-rw-r--r--src/intel/vulkan_hasvk/gfx8_cmd_buffer.c271
4 files changed, 27 insertions, 1183 deletions
diff --git a/src/intel/vulkan_hasvk/anv_genX.h b/src/intel/vulkan_hasvk/anv_genX.h
index 2f996ae8a12..27bf53e3487 100644
--- a/src/intel/vulkan_hasvk/anv_genX.h
+++ b/src/intel/vulkan_hasvk/anv_genX.h
@@ -64,9 +64,6 @@ void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
-void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
- const struct isl_surf *surf);
-
void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
int vb_index,
struct anv_address vb_address,
@@ -75,10 +72,6 @@ void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *
uint32_t access_type,
uint64_t vb_used);
-void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
- unsigned width, unsigned height,
- unsigned scale);
-
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan_hasvk/genX_blorp_exec.c b/src/intel/vulkan_hasvk/genX_blorp_exec.c
index 198edcc83f1..819c3f6d192 100644
--- a/src/intel/vulkan_hasvk/genX_blorp_exec.c
+++ b/src/intel/vulkan_hasvk/genX_blorp_exec.c
@@ -282,10 +282,6 @@ blorp_exec_on_render(struct blorp_batch *batch,
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
- const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
- genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
- params->y1 - params->y0, scale);
-
#if GFX_VER >= 11
/* The PIPE_CONTROL command description says:
*
@@ -301,10 +297,6 @@ blorp_exec_on_render(struct blorp_batch *batch,
"before blorp BTI change");
#endif
- if (params->depth.enabled &&
- !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
- genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
-
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply any outstanding flushes in case pipeline select haven't. */
diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c
index dbb4e763004..228893454f3 100644
--- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c
@@ -61,13 +61,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) {
enum anv_pipe_bits bits = 0;
bits |= (pc->DepthCacheFlushEnable) ? ANV_PIPE_DEPTH_CACHE_FLUSH_BIT : 0;
bits |= (pc->DCFlushEnable) ? ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
-#if GFX_VERx10 >= 125
- bits |= (pc->PSSStallSyncEnable) ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0;
-#endif
-#if GFX_VER >= 12
- bits |= (pc->TileCacheFlushEnable) ? ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0;
- bits |= (pc->HDCPipelineFlushEnable) ? ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : 0;
-#endif
bits |= (pc->RenderTargetCacheFlushEnable) ? ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0;
bits |= (pc->VFCacheInvalidationEnable) ? ANV_PIPE_VF_CACHE_INVALIDATE_BIT : 0;
bits |= (pc->StateCacheInvalidationEnable) ? ANV_PIPE_STATE_CACHE_INVALIDATE_BIT : 0;
@@ -77,9 +70,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) {
bits |= (pc->StallAtPixelScoreboard) ? ANV_PIPE_STALL_AT_SCOREBOARD_BIT : 0;
bits |= (pc->DepthStallEnable) ? ANV_PIPE_DEPTH_STALL_BIT : 0;
bits |= (pc->CommandStreamerStallEnable) ? ANV_PIPE_CS_STALL_BIT : 0;
-#if GFX_VERx10 == 125
- bits |= (pc->UntypedDataPortCacheFlushEnable) ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0;
-#endif
return bits;
}
@@ -108,19 +98,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
*/
cmd_buffer->state.descriptors_dirty |= ~0;
-#if GFX_VERx10 >= 125
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.CommandStreamerStallEnable = true;
- anv_debug_dump_pc(pc);
- }
- anv_batch_emit(
- &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
- btpa.BindingTablePoolBaseAddress =
- anv_cmd_buffer_surface_base_address(cmd_buffer);
- btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096;
- btpa.MOCS = mocs;
- }
-#else /* GFX_VERx10 < 125 */
/* Emit a render target cache flush.
*
* This isn't documented anywhere in the PRM. However, it seems to be
@@ -129,26 +106,12 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
* clear depth, reset state base address, and then go render stuff.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-#if GFX_VER >= 12
- pc.HDCPipelineFlushEnable = true;
-#else
pc.DCFlushEnable = true;
-#endif
pc.RenderTargetCacheFlushEnable = true;
pc.CommandStreamerStallEnable = true;
anv_debug_dump_pc(pc);
}
-#if GFX_VERx10 == 120
- /* Wa_1607854226:
- *
- * Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline
- * mode by putting the pipeline temporarily in 3D mode.
- */
- uint32_t gfx12_wa_pipeline = cmd_buffer->state.current_pipeline;
- genX(flush_pipeline_select_3d)(cmd_buffer);
-#endif
-
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateMOCS = mocs;
@@ -214,35 +177,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
(struct anv_address) { .bo = NULL, .offset = 0xfffff000 };
sba.InstructionAccessUpperBoundModifyEnable = true;
# endif
-# if (GFX_VER >= 9)
- sba.BindlessSurfaceStateBaseAddress =
- (struct anv_address) { device->surface_state_pool.block_pool.bo, 0 };
- sba.BindlessSurfaceStateSize = (1 << 20) - 1;
- sba.BindlessSurfaceStateMOCS = mocs;
- sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
-# endif
-# if (GFX_VER >= 10)
- sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
- sba.BindlessSamplerStateMOCS = mocs;
- sba.BindlessSamplerStateBaseAddressModifyEnable = true;
- sba.BindlessSamplerStateBufferSize = 0;
-# endif
-#if GFX_VERx10 >= 125
- sba.L1CacheControl = L1CC_WB;
-#endif
}
-#if GFX_VERx10 == 120
- /* Wa_1607854226:
- *
- * Put the pipeline back into its current mode.
- */
- if (gfx12_wa_pipeline != UINT32_MAX)
- genX(flush_pipeline_select)(cmd_buffer, gfx12_wa_pipeline);
-#endif
-
-#endif /* GFX_VERx10 < 125 */
-
/* After re-setting the surface state base address, we have to do some
* cache flushing so that the sampler engine will pick up the new
* SURFACE_STATE objects and binding tables. From the Broadwell PRM,
@@ -290,9 +226,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
pc.TextureCacheInvalidationEnable = true;
pc.ConstantCacheInvalidationEnable = true;
pc.StateCacheInvalidationEnable = true;
-#if GFX_VERx10 == 125
- pc.InstructionCacheInvalidateEnable = true;
-#endif
anv_debug_dump_pc(pc);
}
}
@@ -416,8 +349,7 @@ anv_can_fast_clear_color_view(struct anv_device * device,
return false;
/* On Broadwell and earlier, we can only handle 0/1 clear colors */
- if (GFX_VER <= 8 &&
- !isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format))
+ if (!isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format))
return false;
/* If the clear color is one that would require non-trivial format
@@ -513,107 +445,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
-#if GFX_VER == 12
-static void
-anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
- const struct anv_image *image,
- VkImageAspectFlagBits aspect,
- uint32_t base_level, uint32_t level_count,
- uint32_t base_layer, uint32_t layer_count)
-{
- const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
-
- const struct anv_surface *surface = &image->planes[plane].primary_surface;
- uint64_t base_address =
- anv_address_physical(anv_image_address(image, &surface->memory_range));
-
- const struct isl_surf *isl_surf = &image->planes[plane].primary_surface.isl;
- uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(isl_surf);
-
- /* We're about to live-update the AUX-TT. We really don't want anyone else
- * trying to read it while we're doing this. We could probably get away
- * with not having this stall in some cases if we were really careful but
- * it's better to play it safe. Full stall the GPU.
- */
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_END_OF_PIPE_SYNC_BIT,
- "before update AUX-TT");
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
- struct mi_builder b;
- mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
-
- for (uint32_t a = 0; a < layer_count; a++) {
- const uint32_t layer = base_layer + a;
-
- uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
- for (uint32_t l = 0; l < level_count; l++) {
- const uint32_t level = base_level + l;
-
- uint32_t logical_array_layer, logical_z_offset_px;
- if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
- logical_array_layer = 0;
-
- /* If the given miplevel does not have this layer, then any higher
- * miplevels won't either because miplevels only get smaller the
- * higher the LOD.
- */
- assert(layer < image->vk.extent.depth);
- if (layer >= anv_minify(image->vk.extent.depth, level))
- break;
- logical_z_offset_px = layer;
- } else {
- assert(layer < image->vk.array_layers);
- logical_array_layer = layer;
- logical_z_offset_px = 0;
- }
-
- uint64_t slice_start_offset_B, slice_end_offset_B;
- isl_surf_get_image_range_B_tile(isl_surf, level,
- logical_array_layer,
- logical_z_offset_px,
- &slice_start_offset_B,
- &slice_end_offset_B);
-
- start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
- end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
- }
-
- /* Aux operates 64K at a time */
- start_offset_B = align_down_u64(start_offset_B, 64 * 1024);
- end_offset_B = align_u64(end_offset_B, 64 * 1024);
-
- for (uint64_t offset = start_offset_B;
- offset < end_offset_B; offset += 64 * 1024) {
- uint64_t address = base_address + offset;
-
- uint64_t aux_entry_addr64, *aux_entry_map;
- aux_entry_map = intel_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
- address, &aux_entry_addr64);
-
- assert(!anv_use_relocations(cmd_buffer->device->physical));
- struct anv_address aux_entry_address = {
- .bo = NULL,
- .offset = aux_entry_addr64,
- };
-
- const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
- uint64_t new_aux_entry =
- (old_aux_entry & INTEL_AUX_MAP_ADDRESS_MASK) | format_bits;
-
- if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
- new_aux_entry |= INTEL_AUX_MAP_ENTRY_VALID_BIT;
-
- mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry));
- }
- }
-
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
- "after update AUX-TT");
-}
-#endif /* GFX_VER == 12 */
-
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
* the initial layout is undefined, the HiZ buffer and depth buffer will
* represent the same data at the end of this operation.
@@ -631,16 +462,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
return;
-#if GFX_VER == 12
- if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
- initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
- cmd_buffer->device->physical->has_implicit_ccs &&
- cmd_buffer->device->info->has_aux_map) {
- anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
- 0, 1, base_layer, layer_count);
- }
-#endif
-
/* If will_full_fast_clear is set, the caller promises to fast-clear the
* largest portion of the specified range as it can. For depth images,
* that means the entire image because we don't support multi-LOD HiZ.
@@ -735,50 +556,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
base_level, level_count,
base_layer, layer_count);
}
-#elif GFX_VER == 12
- const uint32_t plane =
- anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
- if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
- return;
-
- if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
- initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
- cmd_buffer->device->physical->has_implicit_ccs &&
- cmd_buffer->device->info->has_aux_map) {
- anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
- base_level, level_count, base_layer, layer_count);
-
- /* If will_full_fast_clear is set, the caller promises to fast-clear the
- * largest portion of the specified range as it can.
- */
- if (will_full_fast_clear)
- return;
-
- for (uint32_t l = 0; l < level_count; l++) {
- const uint32_t level = base_level + l;
- const VkRect2D clear_rect = {
- .offset.x = 0,
- .offset.y = 0,
- .extent.width = anv_minify(image->vk.extent.width, level),
- .extent.height = anv_minify(image->vk.extent.height, level),
- };
-
- uint32_t aux_layers =
- anv_image_aux_layers(image, VK_IMAGE_ASPECT_STENCIL_BIT, level);
- uint32_t level_layer_count =
- MIN2(layer_count, aux_layers - base_layer);
-
- /* From Bspec's 3DSTATE_STENCIL_BUFFER_BODY > Stencil Compression
- * Enable:
- *
- * "When enabled, Stencil Buffer needs to be initialized via
- * stencil clear (HZ_OP) before any renderpass."
- */
- anv_image_hiz_clear(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
- level, base_layer, level_layer_count,
- clear_rect, 0 /* Stencil clear value */);
- }
- }
#endif
}
@@ -916,7 +693,6 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
}
#endif /* GFX_VERx10 >= 75 */
-#if GFX_VER <= 8
static void
anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
@@ -957,7 +733,6 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
}
-#endif /* GFX_VER <= 8 */
static void
anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
@@ -971,15 +746,9 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
{
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
-#if GFX_VER >= 9
- anv_cmd_compute_resolve_predicate(cmd_buffer, image,
- aspect, level, array_layer,
- resolve_op, fast_clear_supported);
-#else /* GFX_VER <= 8 */
anv_cmd_simple_resolve_predicate(cmd_buffer, image,
aspect, level, array_layer,
resolve_op, fast_clear_supported);
-#endif
/* CCS_D only supports full resolves and BLORP will assert on us if we try
* to do a partial resolve on a CCS_D surface.
@@ -1059,36 +828,22 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr =
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
- if (GFX_VER >= 9) {
- const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
- const unsigned num_dwords = GFX_VER >= 10 ?
- isl_dev->ss.clear_color_state_size / 4 :
- isl_dev->ss.clear_value_size / 4;
- for (unsigned i = 0; i < num_dwords; i++) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
- sdi.Address = addr;
- sdi.Address.offset += i * 4;
- sdi.ImmediateData = 0;
- }
- }
- } else {
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
- sdi.Address = addr;
- if (GFX_VERx10 >= 75) {
- /* Pre-SKL, the dword containing the clear values also contains
- * other fields, so we need to initialize those fields to match the
- * values that would be in a color attachment.
- */
- sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
- ISL_CHANNEL_SELECT_GREEN << 22 |
- ISL_CHANNEL_SELECT_BLUE << 19 |
- ISL_CHANNEL_SELECT_ALPHA << 16;
- } else if (GFX_VER == 7) {
- /* On IVB, the dword containing the clear values also contains
- * other fields that must be zero or can be zero.
- */
- sdi.ImmediateData = 0;
- }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = addr;
+ if (GFX_VERx10 >= 75) {
+ /* Pre-SKL, the dword containing the clear values also contains
+ * other fields, so we need to initialize those fields to match the
+ * values that would be in a color attachment.
+ */
+ sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
+ ISL_CHANNEL_SELECT_GREEN << 22 |
+ ISL_CHANNEL_SELECT_BLUE << 19 |
+ ISL_CHANNEL_SELECT_ALPHA << 16;
+ } else if (GFX_VER == 7) {
+ /* On IVB, the dword containing the clear values also contains
+ * other fields that must be zero or can be zero.
+ */
+ sdi.ImmediateData = 0;
}
}
}
@@ -1321,17 +1076,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
}
}
-#if GFX_VER == 12
- if (initial_layout_undefined) {
- if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
- anv_image_init_aux_tt(cmd_buffer, image, aspect,
- base_level, level_count,
- base_layer, layer_count);
- }
- }
-#else
assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map));
-#endif
if (must_init_fast_clear_state) {
if (base_level == 0 && base_layer == 0)
@@ -1906,17 +1651,12 @@ genX(CmdExecuteCommands)(
secondary->perf_query_pool == primary->perf_query_pool);
if (secondary->perf_query_pool)
primary->perf_query_pool = secondary->perf_query_pool;
-
-#if GFX_VERx10 == 120
- if (secondary->state.depth_reg_mode != ANV_DEPTH_REG_MODE_UNKNOWN)
- primary->state.depth_reg_mode = secondary->state.depth_reg_mode;
-#endif
}
/* The secondary isn't counted in our VF cache tracking so we need to
* invalidate the whole thing.
*/
- if (GFX_VER >= 8 && GFX_VER <= 9) {
+ if (GFX_VER == 8) {
anv_add_pending_pipe_bits(primary,
ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
"Secondary cmd buffer not tracked in VF cache");
@@ -1951,16 +1691,10 @@ void
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
const struct intel_l3_config *cfg)
{
- assert(cfg || GFX_VER >= 12);
+ assert(cfg);
if (cfg == cmd_buffer->state.current_l3_config)
return;
-#if GFX_VER >= 11
- /* On Gfx11+ we use only one config, so verify it remains the same and skip
- * the stalling programming entirely.
- */
- assert(cfg == cmd_buffer->device->l3_config);
-#else
if (INTEL_DEBUG(DEBUG_L3)) {
mesa_logd("L3 config transition: ");
intel_dump_l3_config(cfg, stderr);
@@ -2011,7 +1745,6 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
}
genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg);
-#endif /* GFX_VER >= 11 */
cmd_buffer->state.current_l3_config = cfg;
}
@@ -2051,16 +1784,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
if (bits & ANV_PIPE_FLUSH_BITS)
bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
-
- /* HSD 1209978178: docs say that before programming the aux table:
- *
- * "Driver must ensure that the engine is IDLE but ensure it doesn't
- * add extra flushes in the case it knows that the engine is already
- * IDLE."
- */
- if (GFX_VER == 12 && (bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT))
- bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
-
/* If we're going to do an invalidate and we have a pending end-of-pipe
* sync that has yet to be resolved, we do the end-of-pipe sync now.
*/
@@ -2079,50 +1802,14 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
*
* The same text exists a few rows below for Post Sync Op.
*/
- if (bits & ANV_PIPE_POST_SYNC_BIT) {
- if (GFX_VER == 9 && current_pipeline == GPGPU)
- bits |= ANV_PIPE_CS_STALL_BIT;
+ if (bits & ANV_PIPE_POST_SYNC_BIT)
bits &= ~ANV_PIPE_POST_SYNC_BIT;
- }
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS |
ANV_PIPE_END_OF_PIPE_SYNC_BIT)) {
anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe) {
-#if GFX_VERx10 >= 125
- /* BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush:
- *
- * "'HDC Pipeline Flush' bit must be set for this bit to take
- * effect."
- *
- * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
- *
- * "When the "Pipeline Select" mode in PIPELINE_SELECT command is
- * set to "3D", HDC Pipeline Flush can also flush/invalidate the
- * LSC Untyped L1 cache based on the programming of HDC_Chicken0
- * register bits 13:11."
- *
- * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC
- * Untyped L1 cache flush is controlled by 'Untyped Data-Port
- * Cache Flush' bit in the PIPE_CONTROL command."
- *
- * As part of Wa_1608949956 & Wa_14010198302, i915 is programming
- * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D
- * Pipecontrol Dataport flush, and UAV coherency barrier event").
- * So there is no need to set "Untyped Data-Port Cache" in 3D
- * mode.
- */
- pipe.UntypedDataPortCacheFlushEnable =
- (bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) &&
- current_pipeline == GPGPU;
- pipe.HDCPipelineFlushEnable |= pipe.UntypedDataPortCacheFlushEnable;
-#endif
-#if GFX_VER >= 12
- pipe.TileCacheFlushEnable = bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT;
- pipe.HDCPipelineFlushEnable |= bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
-#else
/* Flushing HDC pipeline requires DC Flush on earlier HW. */
pipe.DCFlushEnable |= bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
-#endif
pipe.DepthCacheFlushEnable = bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
pipe.DCFlushEnable |= bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT;
pipe.RenderTargetCacheFlushEnable =
@@ -2131,16 +1818,7 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
/* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
* be set with any PIPE_CONTROL with Depth Flush Enable bit set.
*/
-#if GFX_VER >= 12
- pipe.DepthStallEnable =
- pipe.DepthCacheFlushEnable || (bits & ANV_PIPE_DEPTH_STALL_BIT);
-#else
pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT;
-#endif
-
-#if GFX_VERx10 >= 125
- pipe.PSSStallSyncEnable = bits & ANV_PIPE_PSS_STALL_SYNC_BIT;
-#endif
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
#if GFX_VER == 8
@@ -2271,31 +1949,11 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
}
if (bits & ANV_PIPE_INVALIDATE_BITS) {
- /* From the SKL PRM, Vol. 2a, "PIPE_CONTROL",
- *
- * "If the VF Cache Invalidation Enable is set to a 1 in a
- * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields sets to
- * 0, with the VF Cache Invalidation Enable set to 0 needs to be sent
- * prior to the PIPE_CONTROL with VF Cache Invalidation Enable set to
- * a 1."
- *
- * This appears to hang Broadwell, so we restrict it to just gfx9.
- */
- if (GFX_VER == 9 && (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT))
- anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe);
-
anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe) {
pipe.StateCacheInvalidationEnable =
bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
pipe.ConstantCacheInvalidationEnable =
bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
-#if GFX_VER >= 12
- /* Invalidates the L3 cache part in which index & vertex data is loaded
- * when VERTEX_BUFFER_STATE::L3BypassDisable is set.
- */
- pipe.L3ReadOnlyCacheInvalidationEnable =
- bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
-#endif
pipe.VFCacheInvalidationEnable =
bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
pipe.TextureCacheInvalidationEnable =
@@ -2303,28 +1961,9 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
pipe.InstructionCacheInvalidateEnable =
bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT;
- /* From the SKL PRM, Vol. 2a, "PIPE_CONTROL",
- *
- * "When VF Cache Invalidate is set “Post Sync Operation” must be
- * enabled to “Write Immediate Data” or “Write PS Depth Count” or
- * “Write Timestamp”.
- */
- if (GFX_VER == 9 && pipe.VFCacheInvalidationEnable) {
- pipe.PostSyncOperation = WriteImmediateData;
- pipe.Address = device->workaround_address;
- }
anv_debug_dump_pc(pipe);
}
-#if GFX_VER == 12
- if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) {
- anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
- lri.RegisterOffset = GENX(GFX_CCS_AUX_INV_num);
- lri.DataDWord = 1;
- }
- }
-#endif
-
bits &= ~ANV_PIPE_INVALIDATE_BITS;
}
@@ -2346,7 +1985,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
if (trace_flush)
trace_intel_begin_stall(&cmd_buffer->trace);
- if ((GFX_VER >= 8 && GFX_VER <= 9) &&
+ if (GFX_VER == 8 &&
(bits & ANV_PIPE_CS_STALL_BIT) &&
(bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) {
/* If we are doing a VF cache invalidate AND a CS stall (it must be
@@ -2512,20 +2151,6 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
alloc.ConstantBufferSize = push_constant_kb - kb_used;
}
-#if GFX_VERx10 == 125
- /* Wa_22011440098
- *
- * In 3D mode, after programming push constant alloc command immediately
- * program push constant command(ZERO length) without any commit between
- * them.
- */
- if (intel_device_info_is_dg2(cmd_buffer->device->info)) {
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
- c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
- }
- }
-#endif
-
cmd_buffer->state.gfx.push_constant_stages = stages;
/* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:
@@ -3169,9 +2794,7 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
*
* Let's not bother and assume it's all internal.
*/
-#if GFX_VER >= 9
- c.MOCS = mocs;
-#elif GFX_VER < 8
+#if GFX_VER != 8
c.ConstantBody.MOCS = mocs;
#endif
@@ -3230,61 +2853,6 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
}
}
-#if GFX_VER >= 12
-static void
-cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
- uint32_t shader_mask,
- struct anv_address *buffers,
- uint32_t buffer_count)
-{
- if (buffer_count == 0) {
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
- c.ShaderUpdateEnable = shader_mask;
- c.MOCS = isl_mocs(&cmd_buffer->device->isl_dev, 0, false);
- }
- return;
- }
-
- const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
- const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
-
- static const UNUSED uint32_t push_constant_opcodes[] = {
- [MESA_SHADER_VERTEX] = 21,
- [MESA_SHADER_TESS_CTRL] = 25, /* HS */
- [MESA_SHADER_TESS_EVAL] = 26, /* DS */
- [MESA_SHADER_GEOMETRY] = 22,
- [MESA_SHADER_FRAGMENT] = 23,
- };
-
- gl_shader_stage stage = vk_to_mesa_shader_stage(shader_mask);
- assert(stage < ARRAY_SIZE(push_constant_opcodes));
-
- const struct anv_pipeline_bind_map *bind_map =
- &pipeline->shaders[stage]->bind_map;
-
- uint32_t *dw;
- const uint32_t buffer_mask = (1 << buffer_count) - 1;
- const uint32_t num_dwords = 2 + 2 * buffer_count;
-
- dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
- GENX(3DSTATE_CONSTANT_ALL),
- .ShaderUpdateEnable = shader_mask,
- .PointerBufferMask = buffer_mask,
- .MOCS = isl_mocs(&cmd_buffer->device->isl_dev, 0, false));
-
- for (int i = 0; i < buffer_count; i++) {
- const struct anv_push_range *range = &bind_map->push_ranges[i];
- GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
- &cmd_buffer->batch, dw + 2 + i * 2,
- &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
- .PointerToConstantBuffer =
- anv_address_add(buffers[i], range->start * 32),
- .ConstantBufferReadLength = range->length,
- });
- }
-}
-#endif
-
static void
cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
VkShaderStageFlags dirty_stages)
@@ -3293,10 +2861,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
-#if GFX_VER >= 12
- uint32_t nobuffer_stages = 0;
-#endif
-
/* Compute robust pushed register access mask for each stage. */
if (cmd_buffer->device->robust_buffer_access) {
anv_foreach_stage(stage, dirty_stages) {
@@ -3373,34 +2937,9 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
assert(bind_map->push_ranges[i].length == 0);
}
-#if GFX_VER >= 12
- /* If this stage doesn't have any push constants, emit it later in a
- * single CONSTANT_ALL packet.
- */
- if (buffer_count == 0) {
- nobuffer_stages |= 1 << stage;
- continue;
- }
-
- /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL
- * contains only 5 bits, so we can only use it for buffers smaller than
- * 32.
- */
- if (max_push_range < 32) {
- cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage,
- buffers, buffer_count);
- continue;
- }
-#endif
-
cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count);
}
-#if GFX_VER >= 12
- if (nobuffer_stages)
- cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0);
-#endif
-
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
@@ -3720,8 +3259,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
- genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
-
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply any pending pipeline flushes we may have. We want to apply them
@@ -3769,9 +3306,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
.BufferPitch = stride,
.BufferStartingAddress = anv_address_add(buffer->address, offset),
.NullVertexBuffer = offset >= buffer->vk.size,
-#if GFX_VER >= 12
- .L3BypassDisable = true,
-#endif
#if GFX_VER >= 8
.BufferSize = size,
@@ -3793,7 +3327,7 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
};
}
-#if GFX_VER >= 8 && GFX_VER <= 9
+#if GFX_VER == 8
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, vb,
state.BufferStartingAddress,
state.BufferSize);
@@ -3835,12 +3369,7 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
struct anv_xfb_binding *xfb = &cmd_buffer->state.xfb_bindings[idx];
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
-#if GFX_VER < 12
sob.SOBufferIndex = idx;
-#else
- sob._3DCommandOpcode = 0;
- sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + idx;
-#endif
if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) {
sob.MOCS = anv_mocs(cmd_buffer->device, xfb->buffer->address.bo, 0);
@@ -3865,19 +3394,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
}
}
}
-
- if (intel_device_info_is_dg2(cmd_buffer->device->info)) {
- /* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_CS_STALL_BIT,
- "after SO_BUFFER change WA");
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
- } else if (GFX_VER >= 10) {
- /* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_CS_STALL_BIT,
- "after 3DSTATE_SO_BUFFER call");
- }
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
@@ -3999,9 +3515,6 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
.MOCS = anv_mocs(cmd_buffer->device, addr.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
.NullVertexBuffer = size == 0,
-#if GFX_VER >= 12
- .L3BypassDisable = true,
-#endif
#if (GFX_VER >= 8)
.BufferStartingAddress = addr,
.BufferSize = size
@@ -5006,7 +4519,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
&pipeline->cs, 1);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
-#if GFX_VERx10 < 125
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.BindingTablePointer =
@@ -5028,21 +4540,18 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
mid.InterfaceDescriptorTotalLength = size;
mid.InterfaceDescriptorDataStartAddress = state.offset;
}
-#endif
}
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
comp_state->push_data =
anv_cmd_buffer_cs_push_constants(cmd_buffer);
-#if GFX_VERx10 < 125
if (comp_state->push_data.alloc_size) {
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = comp_state->push_data.alloc_size;
curbe.CURBEDataStartAddress = comp_state->push_data.offset;
}
}
-#endif
cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
@@ -5101,56 +4610,6 @@ void genX(CmdDispatch)(
genX(CmdDispatchBase)(commandBuffer, 0, 0, 0, x, y, z);
}
-#if GFX_VERx10 >= 125
-
-static inline void
-emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
- const struct anv_compute_pipeline *pipeline, bool indirect,
- const struct brw_cs_prog_data *prog_data,
- uint32_t groupCountX, uint32_t groupCountY,
- uint32_t groupCountZ)
-{
- struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
- const struct anv_shader_bin *cs_bin = pipeline->cs;
- bool predicate = cmd_buffer->state.conditional_render_enabled;
-
- const struct intel_device_info *devinfo = pipeline->base.device->info;
- const struct brw_cs_dispatch_info dispatch =
- brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
-
- anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
- cw.IndirectParameterEnable = indirect;
- cw.PredicateEnable = predicate;
- cw.SIMDSize = dispatch.simd_size / 16;
- cw.IndirectDataStartAddress = comp_state->push_data.offset;
- cw.IndirectDataLength = comp_state->push_data.alloc_size;
- cw.LocalXMaximum = prog_data->local_size[0] - 1;
- cw.LocalYMaximum = prog_data->local_size[1] - 1;
- cw.LocalZMaximum = prog_data->local_size[2] - 1;
- cw.ThreadGroupIDXDimension = groupCountX;
- cw.ThreadGroupIDYDimension = groupCountY;
- cw.ThreadGroupIDZDimension = groupCountZ;
- cw.ExecutionMask = dispatch.right_mask;
- cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0);
-
- cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
- .KernelStartPointer = cs_bin->kernel.offset,
- .SamplerStatePointer =
- cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
- .BindingTablePointer =
- cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
- .BindingTableEntryCount =
- 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
- .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
- .SharedLocalMemorySize = encode_slm_size(GFX_VER,
- prog_data->base.total_shared),
- .NumberOfBarriers = prog_data->uses_barrier,
- };
- }
-}
-
-#else /* #if GFX_VERx10 >= 125 */
-
static inline void
emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
const struct anv_compute_pipeline *pipeline, bool indirect,
@@ -5182,8 +4641,6 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH), msf);
}
-#endif /* #if GFX_VERx10 >= 125 */
-
static inline void
emit_cs_walker(struct anv_cmd_buffer *cmd_buffer,
const struct anv_compute_pipeline *pipeline, bool indirect,
@@ -5191,13 +4648,8 @@ emit_cs_walker(struct anv_cmd_buffer *cmd_buffer,
uint32_t groupCountX, uint32_t groupCountY,
uint32_t groupCountZ)
{
-#if GFX_VERx10 >= 125
- emit_compute_walker(cmd_buffer, pipeline, indirect, prog_data, groupCountX,
- groupCountY, groupCountZ);
-#else
emit_gpgpu_walker(cmd_buffer, pipeline, indirect, prog_data, groupCountX,
groupCountY, groupCountZ);
-#endif
}
void genX(CmdDispatchBase)(
@@ -5365,47 +4817,6 @@ void genX(CmdDispatchIndirect)(
trace_intel_end_compute(&cmd_buffer->trace, 0, 0, 0);
}
-struct anv_state
-genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
-{
-#if GFX_VERx10 >= 125
- struct anv_device *device = cmd_buffer->device;
-
- struct anv_state state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
- BRW_RT_DISPATCH_GLOBALS_SIZE,
- 64);
- struct brw_rt_scratch_layout layout;
- uint32_t stack_ids_per_dss = 2048; /* TODO: can we use a lower value in
- * some cases?
- */
- brw_rt_compute_scratch_layout(&layout, device->info,
- stack_ids_per_dss, 1 << 10);
-
- struct GFX_RT_DISPATCH_GLOBALS rtdg = {
- .MemBaseAddress = (struct anv_address) {
- /* The ray query HW computes offsets from the top of the buffer, so
- * let the address at the end of the buffer.
- */
- .bo = device->ray_query_bo,
- .offset = device->ray_query_bo->size
- },
- .AsyncRTStackSize = layout.ray_stack_stride / 64,
- .NumDSSRTStacks = layout.stack_ids_per_dss,
- .MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
- .Flags = RT_DEPTH_TEST_LESS_EQUAL,
- .ResumeShaderTable = (struct anv_address) {
- .bo = cmd_buffer->state.ray_query_shadow_bo,
- },
- };
- GFX_RT_DISPATCH_GLOBALS_pack(NULL, state.map, &rtdg);
-
- return state;
-#else
- unreachable("Not supported");
-#endif
-}
-
static void
genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
uint32_t pipeline)
@@ -5415,7 +4826,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.current_pipeline == pipeline)
return;
-#if GFX_VER >= 8 && GFX_VER < 10
+#if GFX_VER >= 8
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
*
* Software must clear the COLOR_CALC_STATE Valid field in
@@ -5429,30 +4840,6 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
#endif
-#if GFX_VER == 9
- if (pipeline == _3D) {
- /* There is a mid-object preemption workaround which requires you to
- * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D. However,
- * even without preemption, we have issues with geometry flickering when
- * GPGPU and 3D are back-to-back and this seems to fix it. We don't
- * really know why.
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) {
- vfe.MaximumNumberofThreads =
- devinfo->max_cs_threads * devinfo->subslice_total - 1;
- vfe.NumberofURBEntries = 2;
- vfe.URBEntryAllocationSize = 2;
- }
-
- /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is
- * invalid. Set the compute pipeline to dirty to force a re-emit of the
- * pipeline in case we get back-to-back dispatch calls with the same
- * pipeline and a PIPELINE_SELECT in between.
- */
- cmd_buffer->state.compute.pipeline_dirty = true;
- }
-#endif
-
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
* PIPELINE_SELECT [DevBWR+]":
*
@@ -5480,30 +4867,9 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) {
-#if GFX_VER >= 9
- ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
- ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
-#endif
ps.PipelineSelection = pipeline;
}
-#if GFX_VER == 9
- if (devinfo->platform == INTEL_PLATFORM_GLK) {
- /* Project: DevGLK
- *
- * "This chicken bit works around a hardware issue with barrier logic
- * encountered when switching between GPGPU and 3D pipelines. To
- * workaround the issue, this mode bit should be set after a pipeline
- * is selected."
- */
- anv_batch_write_reg(&cmd_buffer->batch, GENX(SLICE_COMMON_ECO_CHICKEN1), scec1) {
- scec1.GLKBarrierMode = pipeline == GPGPU ? GLK_BARRIER_MODE_GPGPU
- : GLK_BARRIER_MODE_3D_HULL;
- scec1.GLKBarrierModeMask = 1;
- }
- }
-#endif
-
cmd_buffer->state.current_pipeline = pipeline;
}
@@ -5543,9 +4909,6 @@ genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
}
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
pipe.DepthCacheFlushEnable = true;
-#if GFX_VER >= 12
- pipe.TileCacheFlushEnable = true;
-#endif
anv_debug_dump_pc(pipe);
}
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
@@ -5554,54 +4917,6 @@ genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
}
}
-void
-genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
- const struct isl_surf *surf)
-{
-#if GFX_VERx10 == 120
- const bool is_d16_1x_msaa = surf->format == ISL_FORMAT_R16_UNORM &&
- surf->samples == 1;
-
- switch (cmd_buffer->state.depth_reg_mode) {
- case ANV_DEPTH_REG_MODE_HW_DEFAULT:
- if (!is_d16_1x_msaa)
- return;
- break;
- case ANV_DEPTH_REG_MODE_D16_1X_MSAA:
- if (is_d16_1x_msaa)
- return;
- break;
- case ANV_DEPTH_REG_MODE_UNKNOWN:
- break;
- }
-
- /* We'll change some CHICKEN registers depending on the depth surface
- * format. Do a depth flush and stall so the pipeline is not using these
- * settings while we change the registers.
- */
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
- ANV_PIPE_DEPTH_STALL_BIT |
- ANV_PIPE_END_OF_PIPE_SYNC_BIT,
- "Workaround: Stop pipeline for 14010455700");
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
- /* Wa_14010455700
- *
- * To avoid sporadic corruptions “Set 0x7010[9] when Depth Buffer
- * Surface Format is D16_UNORM , surface type is not NULL & 1X_MSAA”.
- */
- anv_batch_write_reg(&cmd_buffer->batch, GENX(COMMON_SLICE_CHICKEN1), reg) {
- reg.HIZPlaneOptimizationdisablebit = is_d16_1x_msaa;
- reg.HIZPlaneOptimizationdisablebitMask = true;
- }
-
- cmd_buffer->state.depth_reg_mode =
- is_d16_1x_msaa ? ANV_DEPTH_REG_MODE_D16_1X_MSAA :
- ANV_DEPTH_REG_MODE_HW_DEFAULT;
-#endif
-}
-
/* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
*
* "The VF cache needs to be invalidated before binding and then using
@@ -5635,8 +4950,7 @@ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer
struct anv_address vb_address,
uint32_t vb_size)
{
- if (GFX_VER < 8 || GFX_VER > 9 ||
- anv_use_relocations(cmd_buffer->device->physical))
+ if (GFX_VER < 8 || anv_use_relocations(cmd_buffer->device->physical))
return;
struct anv_vb_cache_range *bound, *dirty;
@@ -5666,8 +4980,7 @@ genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_b
uint32_t access_type,
uint64_t vb_used)
{
- if (GFX_VER < 8 || GFX_VER > 9 ||
- anv_use_relocations(cmd_buffer->device->physical))
+ if (GFX_VER < 8 || anv_use_relocations(cmd_buffer->device->physical))
return;
if (access_type == RANDOM) {
@@ -5699,97 +5012,6 @@ genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_b
}
}
-/**
- * Update the pixel hashing modes that determine the balancing of PS threads
- * across subslices and slices.
- *
- * \param width Width bound of the rendering area (already scaled down if \p
- * scale is greater than 1).
- * \param height Height bound of the rendering area (already scaled down if \p
- * scale is greater than 1).
- * \param scale The number of framebuffer samples that could potentially be
- * affected by an individual channel of the PS thread. This is
- * typically one for single-sampled rendering, but for operations
- * like CCS resolves and fast clears a single PS invocation may
- * update a huge number of pixels, in which case a finer
- * balancing is desirable in order to maximally utilize the
- * bandwidth available. UINT_MAX can be used as shorthand for
- * "finest hashing mode available".
- */
-void
-genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
- unsigned width, unsigned height,
- unsigned scale)
-{
-#if GFX_VER == 9
- const struct intel_device_info *devinfo = cmd_buffer->device->info;
- const unsigned slice_hashing[] = {
- /* Because all Gfx9 platforms with more than one slice require
- * three-way subslice hashing, a single "normal" 16x16 slice hashing
- * block is guaranteed to suffer from substantial imbalance, with one
- * subslice receiving twice as much work as the other two in the
- * slice.
- *
- * The performance impact of that would be particularly severe when
- * three-way hashing is also in use for slice balancing (which is the
- * case for all Gfx9 GT4 platforms), because one of the slices
- * receives one every three 16x16 blocks in either direction, which
- * is roughly the periodicity of the underlying subslice imbalance
- * pattern ("roughly" because in reality the hardware's
- * implementation of three-way hashing doesn't do exact modulo 3
- * arithmetic, which somewhat decreases the magnitude of this effect
- * in practice). This leads to a systematic subslice imbalance
- * within that slice regardless of the size of the primitive. The
- * 32x32 hashing mode guarantees that the subslice imbalance within a
- * single slice hashing block is minimal, largely eliminating this
- * effect.
- */
- _32x32,
- /* Finest slice hashing mode available. */
- NORMAL
- };
- const unsigned subslice_hashing[] = {
- /* 16x16 would provide a slight cache locality benefit especially
- * visible in the sampler L1 cache efficiency of low-bandwidth
- * non-LLC platforms, but it comes at the cost of greater subslice
- * imbalance for primitives of dimensions approximately intermediate
- * between 16x4 and 16x16.
- */
- _16x4,
- /* Finest subslice hashing mode available. */
- _8x4
- };
- /* Dimensions of the smallest hashing block of a given hashing mode. If
- * the rendering area is smaller than this there can't possibly be any
- * benefit from switching to this mode, so we optimize out the
- * transition.
- */
- const unsigned min_size[][2] = {
- { 16, 4 },
- { 8, 4 }
- };
- const unsigned idx = scale > 1;
-
- if (cmd_buffer->state.current_hash_scale != scale &&
- (width > min_size[idx][0] || height > min_size[idx][1])) {
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_CS_STALL_BIT |
- ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
- "change pixel hash mode");
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
- anv_batch_write_reg(&cmd_buffer->batch, GENX(GT_MODE), gt) {
- gt.SliceHashing = (devinfo->num_slices > 1 ? slice_hashing[idx] : 0);
- gt.SliceHashingMask = (devinfo->num_slices > 1 ? -1 : 0);
- gt.SubsliceHashing = subslice_hashing[idx];
- gt.SubsliceHashingMask = -1;
- }
-
- cmd_buffer->state.current_hash_scale = scale;
- }
-#endif
-}
-
static void
cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
{
@@ -5891,65 +5113,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
- if (info.depth_surf)
- genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, info.depth_surf);
-
- if (GFX_VER >= 12) {
- cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
- /* Wa_1408224581
- *
- * Workaround: Gfx12LP Astep only An additional pipe control with
- * post-sync = store dword operation would be required.( w/a is to
- * have an additional pipe control after the stencil state whenever
- * the surface state bits of this state is changing).
- *
- * This also seems sufficient to handle Wa_14014148106.
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.PostSyncOperation = WriteImmediateData;
- pc.Address = cmd_buffer->device->workaround_address;
- }
- }
cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage);
}
-static void
-cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer,
- const struct anv_image_view *fsr_iview)
-{
-#if GFX_VERx10 >= 125
- struct anv_device *device = cmd_buffer->device;
-
- if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
- return;
-
- uint32_t *dw = anv_batch_emit_dwords(&cmd_buffer->batch,
- device->isl_dev.cpb.size / 4);
- if (dw == NULL)
- return;
-
- struct isl_cpb_emit_info info = { };
-
- if (fsr_iview) {
- info.view = &fsr_iview->planes[0].isl;
- info.surf = &fsr_iview->image->planes[0].primary_surface.isl;
- info.address =
- anv_batch_emit_reloc(&cmd_buffer->batch,
- dw + device->isl_dev.cpb.offset / 4,
- fsr_iview->image->bindings[0].address.bo,
- fsr_iview->image->bindings[0].address.offset +
- fsr_iview->image->bindings[0].memory_range.offset);
- info.mocs =
- anv_mocs(device, fsr_iview->image->bindings[0].address.bo,
- ISL_SURF_USAGE_CPB_BIT);
- }
-
- isl_emit_cpb_control_s(&device->isl_dev, dw, &info);
-#endif /* GFX_VERx10 >= 125 */
-}
-
static VkImageLayout
attachment_initial_layout(const VkRenderingAttachmentInfo *att)
{
@@ -6178,8 +5344,7 @@ void genX(CmdBeginRendering)(
add_surface_state_relocs(cmd_buffer, gfx->color_att[i].surface_state);
- if (GFX_VER < 10 &&
- (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
+ if ((att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
(gfx->rendering_flags & VK_RENDERING_RESUMING_BIT)) &&
iview->image->planes[0].aux_usage != ISL_AUX_USAGE_NONE &&
iview->planes[0].isl.base_level == 0 &&
@@ -6199,15 +5364,6 @@ void genX(CmdBeginRendering)(
}
}
- const struct anv_image_view *fsr_iview = NULL;
- const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att =
- vk_find_struct_const(pRenderingInfo->pNext,
- RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
- if (fsr_att != NULL && fsr_att->imageView != VK_NULL_HANDLE) {
- fsr_iview = anv_image_view_from_handle(fsr_att->imageView);
- /* imageLayout and shadingRateAttachmentTexelSize are ignored */
- }
-
const struct anv_image_view *ds_iview = NULL;
const VkRenderingAttachmentInfo *d_att = pRenderingInfo->pDepthAttachment;
const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
@@ -6448,24 +5604,7 @@ void genX(CmdBeginRendering)(
*/
gfx->dirty |= ANV_CMD_DIRTY_PIPELINE;
-#if GFX_VER >= 11
- /* The PIPE_CONTROL command description says:
- *
- * "Whenever a Binding Table Index (BTI) used by a Render Target Message
- * points to a different RENDER_SURFACE_STATE, SW must issue a Render
- * Target Cache Flush by enabling this bit. When render target flush
- * is set due to new association of BTI, PS Scoreboard Stall bit must
- * be set in this packet."
- */
- anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
- ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
- "change RT");
-#endif
-
cmd_buffer_emit_depth_stencil(cmd_buffer);
-
- cmd_buffer_emit_cps_control_buffer(cmd_buffer, fsr_iview);
}
static void
@@ -6950,21 +6089,12 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)(
switch (pOverrideInfo->type) {
case VK_PERFORMANCE_OVERRIDE_TYPE_NULL_HARDWARE_INTEL: {
-#if GFX_VER >= 9
- anv_batch_write_reg(&cmd_buffer->batch, GENX(CS_DEBUG_MODE2), csdm2) {
- csdm2._3DRenderingInstructionDisable = pOverrideInfo->enable;
- csdm2.MediaInstructionDisable = pOverrideInfo->enable;
- csdm2._3DRenderingInstructionDisableMask = true;
- csdm2.MediaInstructionDisableMask = true;
- }
-#else
anv_batch_write_reg(&cmd_buffer->batch, GENX(INSTPM), instpm) {
instpm._3DRenderingInstructionDisable = pOverrideInfo->enable;
instpm.MediaInstructionDisable = pOverrideInfo->enable;
instpm._3DRenderingInstructionDisableMask = true;
instpm.MediaInstructionDisableMask = true;
}
-#endif
break;
}
diff --git a/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c b/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
index 8972a0c73fd..afd29bad80c 100644
--- a/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c
@@ -53,29 +53,8 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
pc.DepthCacheFlushEnable = true;
pc.CommandStreamerStallEnable = true;
pc.RenderTargetCacheFlushEnable = true;
-#if GFX_VER >= 12
- pc.TileCacheFlushEnable = true;
-
- /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
- * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
- */
- pc.DepthStallEnable = true;
-#endif
}
-#if GFX_VER == 9
-
- uint32_t cache_mode;
- anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
- .STCPMAOptimizationEnable = enable,
- .STCPMAOptimizationEnableMask = true);
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
- lri.RegisterOffset = GENX(CACHE_MODE_0_num);
- lri.DataDWord = cache_mode;
- }
-
-#elif GFX_VER == 8
-
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
.NPPMAFixEnable = enable,
@@ -87,8 +66,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
lri.DataDWord = cache_mode;
}
-#endif /* GFX_VER == 8 */
-
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
@@ -100,9 +77,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
pc.DepthStallEnable = true;
pc.DepthCacheFlushEnable = true;
pc.RenderTargetCacheFlushEnable = true;
-#if GFX_VER >= 12
- pc.TileCacheFlushEnable = true;
-#endif
}
}
@@ -196,122 +170,6 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
-UNUSED static bool
-want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
- const struct vk_depth_stencil_state *ds)
-{
- if (GFX_VER > 9)
- return false;
- assert(GFX_VER == 9);
-
- /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
- *
- * Clearing this bit will force the STC cache to wait for pending
- * retirement of pixels at the HZ-read stage and do the STC-test for
- * Non-promoted, R-computed and Computed depth modes instead of
- * postponing the STC-test to RCPFE.
- *
- * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
- * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
- *
- * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
- * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
- * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
- *
- * COMP_STC_EN = STC_TEST_EN &&
- * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
- *
- * SW parses the pipeline states to generate the following logical
- * signal indicating if PMA FIX can be enabled.
- *
- * STC_PMA_OPT =
- * 3DSTATE_WM::ForceThreadDispatch != 1 &&
- * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
- * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
- * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
- * !(3DSTATE_WM::EDSC_Mode == 2) &&
- * 3DSTATE_PS_EXTRA::PixelShaderValid &&
- * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
- * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
- * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
- * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
- * (COMP_STC_EN || STC_WRITE_EN) &&
- * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
- * 3DSTATE_WM::ForceKillPix == ON ||
- * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
- * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
- * 3DSTATE_PS_BLEND::AlphaTestEnable ||
- * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
- * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
- */
-
- /* These are always true:
- * 3DSTATE_WM::ForceThreadDispatch != 1 &&
- * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
- */
-
- /* We only enable the PMA fix if we know for certain that HiZ is enabled.
- * If we don't know whether HiZ is enabled or not, we disable the PMA fix
- * and there is no harm.
- *
- * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
- * 3DSTATE_DEPTH_BUFFER::HIZ Enable
- */
- if (!cmd_buffer->state.hiz_enabled)
- return false;
-
- /* We can't possibly know if HiZ is enabled without the depth attachment */
- ASSERTED const struct anv_image_view *d_iview =
- cmd_buffer->state.gfx.depth_att.iview;
- assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
-
- /* 3DSTATE_PS_EXTRA::PixelShaderValid */
- struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
- return false;
-
- /* !(3DSTATE_WM::EDSC_Mode == 2) */
- const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- if (wm_prog_data->early_fragment_tests)
- return false;
-
- /* We never use anv_pipeline for HiZ ops so this is trivially true:
- * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
- * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
- * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
- * 3DSTATE_WM_HZ_OP::StencilBufferClear)
- */
-
- /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
- * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
- */
- const bool stc_test_en = ds->stencil.test_enable;
-
- /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
- * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
- * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
- */
- const bool stc_write_en = ds->stencil.write_enable;
-
- /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
- const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
-
- /* COMP_STC_EN || STC_WRITE_EN */
- if (!(comp_stc_en || stc_write_en))
- return false;
-
- /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
- * 3DSTATE_WM::ForceKillPix == ON ||
- * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
- * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
- * 3DSTATE_PS_BLEND::AlphaTestEnable ||
- * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
- * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
- */
- return pipeline->kill_pixel ||
- wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
-}
-
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
@@ -319,27 +177,17 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
-#if GFX_VER >= 11
- if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
- genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
-#endif /* GFX_VER >= 11 */
-
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
};
-#if GFX_VER == 8
if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
sf.CHVLineWidth = dyn->rs.line.width;
} else {
sf.LineWidth = dyn->rs.line.width;
}
-#else
- sf.LineWidth = dyn->rs.line.width,
-#endif
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
}
@@ -394,7 +242,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
* across different state packets for gfx8 and gfx9. We handle that by
* using a big old #if switch here.
*/
-#if GFX_VER == 8
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
@@ -462,87 +309,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
-#else
- if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
- struct anv_state cc_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
- GENX(COLOR_CALC_STATE_length) * 4,
- 64);
- struct GENX(COLOR_CALC_STATE) cc = {
- .BlendConstantColorRed = dyn->cb.blend_constants[0],
- .BlendConstantColorGreen = dyn->cb.blend_constants[1],
- .BlendConstantColorBlue = dyn->cb.blend_constants[2],
- .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
- };
- GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
-
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
- ccp.ColorCalcStatePointer = cc_state.offset;
- ccp.ColorCalcStatePointerValid = true;
- }
- }
-
- if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
- ANV_CMD_DIRTY_RENDER_TARGETS)) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
- VkImageAspectFlags ds_aspects = 0;
- if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
- ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
- if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
- ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
-
- struct vk_depth_stencil_state opt_ds = dyn->ds;
- vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
-
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
- ds.DoubleSidedStencilEnable = true;
-
- ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
- ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
-
- ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
- ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
-
- ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
- ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
-
- ds.DepthTestEnable = opt_ds.depth.test_enable;
- ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
- ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
- ds.StencilTestEnable = opt_ds.stencil.test_enable;
- ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
- ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
- ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
- ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
- ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
- ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
- ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
- ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
- ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
- }
-
- const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
- genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
- }
-#endif
-
-#if GFX_VER >= 12
- if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
- db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
- db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
- db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
- }
- }
-#endif
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
@@ -557,9 +323,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
ANV_CMD_DIRTY_INDEX_BUFFER)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
-#if GFX_VERx10 >= 125
- vf.GeometryDistributionEnable = true;
-#endif
vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
vf.CutIndex = cmd_buffer->state.gfx.restart_index;
}
@@ -573,46 +336,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
ib.MOCS = anv_mocs(cmd_buffer->device,
buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
-#if GFX_VER >= 12
- ib.L3BypassDisable = true;
-#endif
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferSize = vk_buffer_range(&buffer->vk, offset,
VK_WHOLE_SIZE);
}
}
-#if GFX_VERx10 >= 125
- if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
- /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
- vfg.DistributionMode =
- anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
- RR_FREE;
- vfg.DistributionGranularity = BatchLevelGranularity;
- /* Wa_14014890652 */
- if (intel_device_info_is_dg2(cmd_buffer->device->info))
- vfg.GranularityThresholdDisable = 1;
- vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
- /* 192 vertices for TRILIST_ADJ */
- vfg.ListNBatchSizeScale = 0;
- /* Batch size of 384 vertices */
- vfg.List3BatchSizeScale = 2;
- /* Batch size of 128 vertices */
- vfg.List2BatchSizeScale = 1;
- /* Batch size of 128 vertices */
- vfg.List1BatchSizeScale = 2;
- /* Batch size of 256 vertices for STRIP topologies */
- vfg.StripBatchSizeScale = 3;
- /* 192 control points for PATCHLIST_3 */
- vfg.PatchBatchSizeScale = 1;
- /* 192 control points for PATCHLIST_3 */
- vfg.PatchBatchSizeMultiplier = 31;
- }
- }
-#endif
-
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);