diff options
author | Jason Ekstrand <jason.ekstrand@collabora.com> | 2022-09-02 22:15:48 -0500 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-12-02 09:18:16 +0000 |
commit | 91090e39af0d00d4fc99b7d2bd680bcac9ce0fb9 (patch) | |
tree | 0fab94f64c1104c465083962873ec13635449756 /src/intel | |
parent | 0626b68c88df50e30e61e9fd2ba3e46144ff9ad5 (diff) |
hasvk/cmd_buffer: Rip out SKL+ support
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19852>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/vulkan_hasvk/anv_genX.h | 7 | ||||
-rw-r--r-- | src/intel/vulkan_hasvk/genX_blorp_exec.c | 8 | ||||
-rw-r--r-- | src/intel/vulkan_hasvk/genX_cmd_buffer.c | 924 | ||||
-rw-r--r-- | src/intel/vulkan_hasvk/gfx8_cmd_buffer.c | 271 |
4 files changed, 27 insertions, 1183 deletions
diff --git a/src/intel/vulkan_hasvk/anv_genX.h b/src/intel/vulkan_hasvk/anv_genX.h index 2f996ae8a12..27bf53e3487 100644 --- a/src/intel/vulkan_hasvk/anv_genX.h +++ b/src/intel/vulkan_hasvk/anv_genX.h @@ -64,9 +64,6 @@ void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer); -void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer, - const struct isl_surf *surf); - void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer, int vb_index, struct anv_address vb_address, @@ -75,10 +72,6 @@ void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer * uint32_t access_type, uint64_t vb_used); -void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer, - unsigned width, unsigned height, - unsigned scale); - void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan_hasvk/genX_blorp_exec.c b/src/intel/vulkan_hasvk/genX_blorp_exec.c index 198edcc83f1..819c3f6d192 100644 --- a/src/intel/vulkan_hasvk/genX_blorp_exec.c +++ b/src/intel/vulkan_hasvk/genX_blorp_exec.c @@ -282,10 +282,6 @@ blorp_exec_on_render(struct blorp_batch *batch, struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT); - const unsigned scale = params->fast_clear_op ? UINT_MAX : 1; - genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0, - params->y1 - params->y0, scale); - #if GFX_VER >= 11 /* The PIPE_CONTROL command description says: * @@ -301,10 +297,6 @@ blorp_exec_on_render(struct blorp_batch *batch, "before blorp BTI change"); #endif - if (params->depth.enabled && - !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) - genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, ¶ms->depth.surf); - genX(flush_pipeline_select_3d)(cmd_buffer); /* Apply any outstanding flushes in case pipeline select haven't. */ diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index dbb4e763004..228893454f3 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -61,13 +61,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) { enum anv_pipe_bits bits = 0; bits |= (pc->DepthCacheFlushEnable) ? ANV_PIPE_DEPTH_CACHE_FLUSH_BIT : 0; bits |= (pc->DCFlushEnable) ? ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0; -#if GFX_VERx10 >= 125 - bits |= (pc->PSSStallSyncEnable) ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0; -#endif -#if GFX_VER >= 12 - bits |= (pc->TileCacheFlushEnable) ? ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0; - bits |= (pc->HDCPipelineFlushEnable) ? ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : 0; -#endif bits |= (pc->RenderTargetCacheFlushEnable) ? ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0; bits |= (pc->VFCacheInvalidationEnable) ? ANV_PIPE_VF_CACHE_INVALIDATE_BIT : 0; bits |= (pc->StateCacheInvalidationEnable) ? ANV_PIPE_STATE_CACHE_INVALIDATE_BIT : 0; @@ -77,9 +70,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) { bits |= (pc->StallAtPixelScoreboard) ? ANV_PIPE_STALL_AT_SCOREBOARD_BIT : 0; bits |= (pc->DepthStallEnable) ? ANV_PIPE_DEPTH_STALL_BIT : 0; bits |= (pc->CommandStreamerStallEnable) ? ANV_PIPE_CS_STALL_BIT : 0; -#if GFX_VERx10 == 125 - bits |= (pc->UntypedDataPortCacheFlushEnable) ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0; -#endif return bits; } @@ -108,19 +98,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) */ cmd_buffer->state.descriptors_dirty |= ~0; -#if GFX_VERx10 >= 125 - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - anv_debug_dump_pc(pc); - } - anv_batch_emit( - &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { - btpa.BindingTablePoolBaseAddress = - anv_cmd_buffer_surface_base_address(cmd_buffer); - btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096; - btpa.MOCS = mocs; - } -#else /* GFX_VERx10 < 125 */ /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -129,26 +106,12 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) * clear depth, reset state base address, and then go render stuff. */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { -#if GFX_VER >= 12 - pc.HDCPipelineFlushEnable = true; -#else pc.DCFlushEnable = true; -#endif pc.RenderTargetCacheFlushEnable = true; pc.CommandStreamerStallEnable = true; anv_debug_dump_pc(pc); } -#if GFX_VERx10 == 120 - /* Wa_1607854226: - * - * Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline - * mode by putting the pipeline temporarily in 3D mode. - */ - uint32_t gfx12_wa_pipeline = cmd_buffer->state.current_pipeline; - genX(flush_pipeline_select_3d)(cmd_buffer); -#endif - anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) { sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 }; sba.GeneralStateMOCS = mocs; @@ -214,35 +177,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) (struct anv_address) { .bo = NULL, .offset = 0xfffff000 }; sba.InstructionAccessUpperBoundModifyEnable = true; # endif -# if (GFX_VER >= 9) - sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { device->surface_state_pool.block_pool.bo, 0 }; - sba.BindlessSurfaceStateSize = (1 << 20) - 1; - sba.BindlessSurfaceStateMOCS = mocs; - sba.BindlessSurfaceStateBaseAddressModifyEnable = true; -# endif -# if (GFX_VER >= 10) - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSamplerStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBufferSize = 0; -# endif -#if GFX_VERx10 >= 125 - sba.L1CacheControl = L1CC_WB; -#endif } -#if GFX_VERx10 == 120 - /* Wa_1607854226: - * - * Put the pipeline back into its current mode. - */ - if (gfx12_wa_pipeline != UINT32_MAX) - genX(flush_pipeline_select)(cmd_buffer, gfx12_wa_pipeline); -#endif - -#endif /* GFX_VERx10 < 125 */ - /* After re-setting the surface state base address, we have to do some * cache flushing so that the sampler engine will pick up the new * SURFACE_STATE objects and binding tables. From the Broadwell PRM, @@ -290,9 +226,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) pc.TextureCacheInvalidationEnable = true; pc.ConstantCacheInvalidationEnable = true; pc.StateCacheInvalidationEnable = true; -#if GFX_VERx10 == 125 - pc.InstructionCacheInvalidateEnable = true; -#endif anv_debug_dump_pc(pc); } } @@ -416,8 +349,7 @@ anv_can_fast_clear_color_view(struct anv_device * device, return false; /* On Broadwell and earlier, we can only handle 0/1 clear colors */ - if (GFX_VER <= 8 && - !isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format)) + if (!isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format)) return false; /* If the clear color is one that would require non-trivial format @@ -513,107 +445,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device, #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) -#if GFX_VER == 12 -static void -anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - uint32_t base_level, uint32_t level_count, - uint32_t base_layer, uint32_t layer_count) -{ - const uint32_t plane = anv_image_aspect_to_plane(image, aspect); - - const struct anv_surface *surface = &image->planes[plane].primary_surface; - uint64_t base_address = - anv_address_physical(anv_image_address(image, &surface->memory_range)); - - const struct isl_surf *isl_surf = &image->planes[plane].primary_surface.isl; - uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(isl_surf); - - /* We're about to live-update the AUX-TT. We really don't want anyone else - * trying to read it while we're doing this. We could probably get away - * with not having this stall in some cases if we were really careful but - * it's better to play it safe. Full stall the GPU. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "before update AUX-TT"); - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - struct mi_builder b; - mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - - for (uint32_t a = 0; a < layer_count; a++) { - const uint32_t layer = base_layer + a; - - uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0; - for (uint32_t l = 0; l < level_count; l++) { - const uint32_t level = base_level + l; - - uint32_t logical_array_layer, logical_z_offset_px; - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - logical_array_layer = 0; - - /* If the given miplevel does not have this layer, then any higher - * miplevels won't either because miplevels only get smaller the - * higher the LOD. - */ - assert(layer < image->vk.extent.depth); - if (layer >= anv_minify(image->vk.extent.depth, level)) - break; - logical_z_offset_px = layer; - } else { - assert(layer < image->vk.array_layers); - logical_array_layer = layer; - logical_z_offset_px = 0; - } - - uint64_t slice_start_offset_B, slice_end_offset_B; - isl_surf_get_image_range_B_tile(isl_surf, level, - logical_array_layer, - logical_z_offset_px, - &slice_start_offset_B, - &slice_end_offset_B); - - start_offset_B = MIN2(start_offset_B, slice_start_offset_B); - end_offset_B = MAX2(end_offset_B, slice_end_offset_B); - } - - /* Aux operates 64K at a time */ - start_offset_B = align_down_u64(start_offset_B, 64 * 1024); - end_offset_B = align_u64(end_offset_B, 64 * 1024); - - for (uint64_t offset = start_offset_B; - offset < end_offset_B; offset += 64 * 1024) { - uint64_t address = base_address + offset; - - uint64_t aux_entry_addr64, *aux_entry_map; - aux_entry_map = intel_aux_map_get_entry(cmd_buffer->device->aux_map_ctx, - address, &aux_entry_addr64); - - assert(!anv_use_relocations(cmd_buffer->device->physical)); - struct anv_address aux_entry_address = { - .bo = NULL, - .offset = aux_entry_addr64, - }; - - const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map); - uint64_t new_aux_entry = - (old_aux_entry & INTEL_AUX_MAP_ADDRESS_MASK) | format_bits; - - if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage)) - new_aux_entry |= INTEL_AUX_MAP_ENTRY_VALID_BIT; - - mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry)); - } - } - - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, - "after update AUX-TT"); -} -#endif /* GFX_VER == 12 */ - /* Transitions a HiZ-enabled depth buffer from one layout to another. Unless * the initial layout is undefined, the HiZ buffer and depth buffer will * represent the same data at the end of this operation. @@ -631,16 +462,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE) return; -#if GFX_VER == 12 - if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED || - initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) && - cmd_buffer->device->physical->has_implicit_ccs && - cmd_buffer->device->info->has_aux_map) { - anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, - 0, 1, base_layer, layer_count); - } -#endif - /* If will_full_fast_clear is set, the caller promises to fast-clear the * largest portion of the specified range as it can. For depth images, * that means the entire image because we don't support multi-LOD HiZ. @@ -735,50 +556,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer, base_level, level_count, base_layer, layer_count); } -#elif GFX_VER == 12 - const uint32_t plane = - anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT); - if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) - return; - - if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED || - initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) && - cmd_buffer->device->physical->has_implicit_ccs && - cmd_buffer->device->info->has_aux_map) { - anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, - base_level, level_count, base_layer, layer_count); - - /* If will_full_fast_clear is set, the caller promises to fast-clear the - * largest portion of the specified range as it can. - */ - if (will_full_fast_clear) - return; - - for (uint32_t l = 0; l < level_count; l++) { - const uint32_t level = base_level + l; - const VkRect2D clear_rect = { - .offset.x = 0, - .offset.y = 0, - .extent.width = anv_minify(image->vk.extent.width, level), - .extent.height = anv_minify(image->vk.extent.height, level), - }; - - uint32_t aux_layers = - anv_image_aux_layers(image, VK_IMAGE_ASPECT_STENCIL_BIT, level); - uint32_t level_layer_count = - MIN2(layer_count, aux_layers - base_layer); - - /* From Bspec's 3DSTATE_STENCIL_BUFFER_BODY > Stencil Compression - * Enable: - * - * "When enabled, Stencil Buffer needs to be initialized via - * stencil clear (HZ_OP) before any renderpass." - */ - anv_image_hiz_clear(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, - level, base_layer, level_layer_count, - clear_rect, 0 /* Stencil clear value */); - } - } #endif } @@ -916,7 +693,6 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, } #endif /* GFX_VERx10 >= 75 */ -#if GFX_VER <= 8 static void anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, @@ -957,7 +733,6 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, mip.CompareOperation = COMPARE_SRCS_EQUAL; } } -#endif /* GFX_VER <= 8 */ static void anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, @@ -971,15 +746,9 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, { const uint32_t plane = anv_image_aspect_to_plane(image, aspect); -#if GFX_VER >= 9 - anv_cmd_compute_resolve_predicate(cmd_buffer, image, - aspect, level, array_layer, - resolve_op, fast_clear_supported); -#else /* GFX_VER <= 8 */ anv_cmd_simple_resolve_predicate(cmd_buffer, image, aspect, level, array_layer, resolve_op, fast_clear_supported); -#endif /* CCS_D only supports full resolves and BLORP will assert on us if we try * to do a partial resolve on a CCS_D surface. @@ -1059,36 +828,22 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, struct anv_address addr = anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); - if (GFX_VER >= 9) { - const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; - const unsigned num_dwords = GFX_VER >= 10 ? - isl_dev->ss.clear_color_state_size / 4 : - isl_dev->ss.clear_value_size / 4; - for (unsigned i = 0; i < num_dwords; i++) { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = addr; - sdi.Address.offset += i * 4; - sdi.ImmediateData = 0; - } - } - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = addr; - if (GFX_VERx10 >= 75) { - /* Pre-SKL, the dword containing the clear values also contains - * other fields, so we need to initialize those fields to match the - * values that would be in a color attachment. - */ - sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 | - ISL_CHANNEL_SELECT_GREEN << 22 | - ISL_CHANNEL_SELECT_BLUE << 19 | - ISL_CHANNEL_SELECT_ALPHA << 16; - } else if (GFX_VER == 7) { - /* On IVB, the dword containing the clear values also contains - * other fields that must be zero or can be zero. - */ - sdi.ImmediateData = 0; - } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = addr; + if (GFX_VERx10 >= 75) { + /* Pre-SKL, the dword containing the clear values also contains + * other fields, so we need to initialize those fields to match the + * values that would be in a color attachment. + */ + sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 | + ISL_CHANNEL_SELECT_GREEN << 22 | + ISL_CHANNEL_SELECT_BLUE << 19 | + ISL_CHANNEL_SELECT_ALPHA << 16; + } else if (GFX_VER == 7) { + /* On IVB, the dword containing the clear values also contains + * other fields that must be zero or can be zero. + */ + sdi.ImmediateData = 0; } } } @@ -1321,17 +1076,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } } -#if GFX_VER == 12 - if (initial_layout_undefined) { - if (device->physical->has_implicit_ccs && devinfo->has_aux_map) { - anv_image_init_aux_tt(cmd_buffer, image, aspect, - base_level, level_count, - base_layer, layer_count); - } - } -#else assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map)); -#endif if (must_init_fast_clear_state) { if (base_level == 0 && base_layer == 0) @@ -1906,17 +1651,12 @@ genX(CmdExecuteCommands)( secondary->perf_query_pool == primary->perf_query_pool); if (secondary->perf_query_pool) primary->perf_query_pool = secondary->perf_query_pool; - -#if GFX_VERx10 == 120 - if (secondary->state.depth_reg_mode != ANV_DEPTH_REG_MODE_UNKNOWN) - primary->state.depth_reg_mode = secondary->state.depth_reg_mode; -#endif } /* The secondary isn't counted in our VF cache tracking so we need to * invalidate the whole thing. */ - if (GFX_VER >= 8 && GFX_VER <= 9) { + if (GFX_VER == 8) { anv_add_pending_pipe_bits(primary, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT, "Secondary cmd buffer not tracked in VF cache"); @@ -1951,16 +1691,10 @@ void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, const struct intel_l3_config *cfg) { - assert(cfg || GFX_VER >= 12); + assert(cfg); if (cfg == cmd_buffer->state.current_l3_config) return; -#if GFX_VER >= 11 - /* On Gfx11+ we use only one config, so verify it remains the same and skip - * the stalling programming entirely. - */ - assert(cfg == cmd_buffer->device->l3_config); -#else if (INTEL_DEBUG(DEBUG_L3)) { mesa_logd("L3 config transition: "); intel_dump_l3_config(cfg, stderr); @@ -2011,7 +1745,6 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, } genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg); -#endif /* GFX_VER >= 11 */ cmd_buffer->state.current_l3_config = cfg; } @@ -2051,16 +1784,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, if (bits & ANV_PIPE_FLUSH_BITS) bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT; - - /* HSD 1209978178: docs say that before programming the aux table: - * - * "Driver must ensure that the engine is IDLE but ensure it doesn't - * add extra flushes in the case it knows that the engine is already - * IDLE." - */ - if (GFX_VER == 12 && (bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)) - bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT; - /* If we're going to do an invalidate and we have a pending end-of-pipe * sync that has yet to be resolved, we do the end-of-pipe sync now. */ @@ -2079,50 +1802,14 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, * * The same text exists a few rows below for Post Sync Op. */ - if (bits & ANV_PIPE_POST_SYNC_BIT) { - if (GFX_VER == 9 && current_pipeline == GPGPU) - bits |= ANV_PIPE_CS_STALL_BIT; + if (bits & ANV_PIPE_POST_SYNC_BIT) bits &= ~ANV_PIPE_POST_SYNC_BIT; - } if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_END_OF_PIPE_SYNC_BIT)) { anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe) { -#if GFX_VERx10 >= 125 - /* BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush: - * - * "'HDC Pipeline Flush' bit must be set for this bit to take - * effect." - * - * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush: - * - * "When the "Pipeline Select" mode in PIPELINE_SELECT command is - * set to "3D", HDC Pipeline Flush can also flush/invalidate the - * LSC Untyped L1 cache based on the programming of HDC_Chicken0 - * register bits 13:11." - * - * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC - * Untyped L1 cache flush is controlled by 'Untyped Data-Port - * Cache Flush' bit in the PIPE_CONTROL command." - * - * As part of Wa_1608949956 & Wa_14010198302, i915 is programming - * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D - * Pipecontrol Dataport flush, and UAV coherency barrier event"). - * So there is no need to set "Untyped Data-Port Cache" in 3D - * mode. - */ - pipe.UntypedDataPortCacheFlushEnable = - (bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) && - current_pipeline == GPGPU; - pipe.HDCPipelineFlushEnable |= pipe.UntypedDataPortCacheFlushEnable; -#endif -#if GFX_VER >= 12 - pipe.TileCacheFlushEnable = bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT; - pipe.HDCPipelineFlushEnable |= bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; -#else /* Flushing HDC pipeline requires DC Flush on earlier HW. */ pipe.DCFlushEnable |= bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; -#endif pipe.DepthCacheFlushEnable = bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; pipe.DCFlushEnable |= bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT; pipe.RenderTargetCacheFlushEnable = @@ -2131,16 +1818,7 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must * be set with any PIPE_CONTROL with Depth Flush Enable bit set. */ -#if GFX_VER >= 12 - pipe.DepthStallEnable = - pipe.DepthCacheFlushEnable || (bits & ANV_PIPE_DEPTH_STALL_BIT); -#else pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT; -#endif - -#if GFX_VERx10 >= 125 - pipe.PSSStallSyncEnable = bits & ANV_PIPE_PSS_STALL_SYNC_BIT; -#endif pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT; #if GFX_VER == 8 @@ -2271,31 +1949,11 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, } if (bits & ANV_PIPE_INVALIDATE_BITS) { - /* From the SKL PRM, Vol. 2a, "PIPE_CONTROL", - * - * "If the VF Cache Invalidation Enable is set to a 1 in a - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields sets to - * 0, with the VF Cache Invalidation Enable set to 0 needs to be sent - * prior to the PIPE_CONTROL with VF Cache Invalidation Enable set to - * a 1." - * - * This appears to hang Broadwell, so we restrict it to just gfx9. - */ - if (GFX_VER == 9 && (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) - anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe); - anv_batch_emit(batch, GENX(PIPE_CONTROL), pipe) { pipe.StateCacheInvalidationEnable = bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT; pipe.ConstantCacheInvalidationEnable = bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; -#if GFX_VER >= 12 - /* Invalidates the L3 cache part in which index & vertex data is loaded - * when VERTEX_BUFFER_STATE::L3BypassDisable is set. - */ - pipe.L3ReadOnlyCacheInvalidationEnable = - bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT; -#endif pipe.VFCacheInvalidationEnable = bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT; pipe.TextureCacheInvalidationEnable = @@ -2303,28 +1961,9 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, pipe.InstructionCacheInvalidateEnable = bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT; - /* From the SKL PRM, Vol. 2a, "PIPE_CONTROL", - * - * "When VF Cache Invalidate is set “Post Sync Operation” must be - * enabled to “Write Immediate Data” or “Write PS Depth Count” or - * “Write Timestamp”. - */ - if (GFX_VER == 9 && pipe.VFCacheInvalidationEnable) { - pipe.PostSyncOperation = WriteImmediateData; - pipe.Address = device->workaround_address; - } anv_debug_dump_pc(pipe); } -#if GFX_VER == 12 - if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) { - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = GENX(GFX_CCS_AUX_INV_num); - lri.DataDWord = 1; - } - } -#endif - bits &= ~ANV_PIPE_INVALIDATE_BITS; } @@ -2346,7 +1985,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) if (trace_flush) trace_intel_begin_stall(&cmd_buffer->trace); - if ((GFX_VER >= 8 && GFX_VER <= 9) && + if (GFX_VER == 8 && (bits & ANV_PIPE_CS_STALL_BIT) && (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) { /* If we are doing a VF cache invalidate AND a CS stall (it must be @@ -2512,20 +2151,6 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) alloc.ConstantBufferSize = push_constant_kb - kb_used; } -#if GFX_VERx10 == 125 - /* Wa_22011440098 - * - * In 3D mode, after programming push constant alloc command immediately - * program push constant command(ZERO length) without any commit between - * them. - */ - if (intel_device_info_is_dg2(cmd_buffer->device->info)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) { - c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0); - } - } -#endif - cmd_buffer->state.gfx.push_constant_stages = stages; /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: @@ -3169,9 +2794,7 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, * * Let's not bother and assume it's all internal. */ -#if GFX_VER >= 9 - c.MOCS = mocs; -#elif GFX_VER < 8 +#if GFX_VER != 8 c.ConstantBody.MOCS = mocs; #endif @@ -3230,61 +2853,6 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, } } -#if GFX_VER >= 12 -static void -cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, - uint32_t shader_mask, - struct anv_address *buffers, - uint32_t buffer_count) -{ - if (buffer_count == 0) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) { - c.ShaderUpdateEnable = shader_mask; - c.MOCS = isl_mocs(&cmd_buffer->device->isl_dev, 0, false); - } - return; - } - - const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; - const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; - - static const UNUSED uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - }; - - gl_shader_stage stage = vk_to_mesa_shader_stage(shader_mask); - assert(stage < ARRAY_SIZE(push_constant_opcodes)); - - const struct anv_pipeline_bind_map *bind_map = - &pipeline->shaders[stage]->bind_map; - - uint32_t *dw; - const uint32_t buffer_mask = (1 << buffer_count) - 1; - const uint32_t num_dwords = 2 + 2 * buffer_count; - - dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_CONSTANT_ALL), - .ShaderUpdateEnable = shader_mask, - .PointerBufferMask = buffer_mask, - .MOCS = isl_mocs(&cmd_buffer->device->isl_dev, 0, false)); - - for (int i = 0; i < buffer_count; i++) { - const struct anv_push_range *range = &bind_map->push_ranges[i]; - GENX(3DSTATE_CONSTANT_ALL_DATA_pack)( - &cmd_buffer->batch, dw + 2 + i * 2, - &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) { - .PointerToConstantBuffer = - anv_address_add(buffers[i], range->start * 32), - .ConstantBufferReadLength = range->length, - }); - } -} -#endif - static void cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, VkShaderStageFlags dirty_stages) @@ -3293,10 +2861,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline; -#if GFX_VER >= 12 - uint32_t nobuffer_stages = 0; -#endif - /* Compute robust pushed register access mask for each stage. */ if (cmd_buffer->device->robust_buffer_access) { anv_foreach_stage(stage, dirty_stages) { @@ -3373,34 +2937,9 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, assert(bind_map->push_ranges[i].length == 0); } -#if GFX_VER >= 12 - /* If this stage doesn't have any push constants, emit it later in a - * single CONSTANT_ALL packet. - */ - if (buffer_count == 0) { - nobuffer_stages |= 1 << stage; - continue; - } - - /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL - * contains only 5 bits, so we can only use it for buffers smaller than - * 32. - */ - if (max_push_range < 32) { - cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage, - buffers, buffer_count); - continue; - } -#endif - cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count); } -#if GFX_VER >= 12 - if (nobuffer_stages) - cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0); -#endif - cmd_buffer->state.push_constants_dirty &= ~flushed; } @@ -3720,8 +3259,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); - genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1); - genX(flush_pipeline_select_3d)(cmd_buffer); /* Apply any pending pipeline flushes we may have. We want to apply them @@ -3769,9 +3306,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) .BufferPitch = stride, .BufferStartingAddress = anv_address_add(buffer->address, offset), .NullVertexBuffer = offset >= buffer->vk.size, -#if GFX_VER >= 12 - .L3BypassDisable = true, -#endif #if GFX_VER >= 8 .BufferSize = size, @@ -3793,7 +3327,7 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) }; } -#if GFX_VER >= 8 && GFX_VER <= 9 +#if GFX_VER == 8 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, vb, state.BufferStartingAddress, state.BufferSize); @@ -3835,12 +3369,7 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) { struct anv_xfb_binding *xfb = &cmd_buffer->state.xfb_bindings[idx]; anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) { -#if GFX_VER < 12 sob.SOBufferIndex = idx; -#else - sob._3DCommandOpcode = 0; - sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + idx; -#endif if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) { sob.MOCS = anv_mocs(cmd_buffer->device, xfb->buffer->address.bo, 0); @@ -3865,19 +3394,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) } } } - - if (intel_device_info_is_dg2(cmd_buffer->device->info)) { - /* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_CS_STALL_BIT, - "after SO_BUFFER change WA"); - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - } else if (GFX_VER >= 10) { - /* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_CS_STALL_BIT, - "after 3DSTATE_SO_BUFFER call"); - } } if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) { @@ -3999,9 +3515,6 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer, .MOCS = anv_mocs(cmd_buffer->device, addr.bo, ISL_SURF_USAGE_VERTEX_BUFFER_BIT), .NullVertexBuffer = size == 0, -#if GFX_VER >= 12 - .L3BypassDisable = true, -#endif #if (GFX_VER >= 8) .BufferStartingAddress = addr, .BufferSize = size @@ -5006,7 +4519,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) &pipeline->cs, 1); cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; -#if GFX_VERx10 < 125 uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { .BindingTablePointer = @@ -5028,21 +4540,18 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) mid.InterfaceDescriptorTotalLength = size; mid.InterfaceDescriptorDataStartAddress = state.offset; } -#endif } if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { comp_state->push_data = anv_cmd_buffer_cs_push_constants(cmd_buffer); -#if GFX_VERx10 < 125 if (comp_state->push_data.alloc_size) { anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { curbe.CURBETotalDataLength = comp_state->push_data.alloc_size; curbe.CURBEDataStartAddress = comp_state->push_data.offset; } } -#endif cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; } @@ -5101,56 +4610,6 @@ void genX(CmdDispatch)( genX(CmdDispatchBase)(commandBuffer, 0, 0, 0, x, y, z); } -#if GFX_VERx10 >= 125 - -static inline void -emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, - const struct anv_compute_pipeline *pipeline, bool indirect, - const struct brw_cs_prog_data *prog_data, - uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ) -{ - struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; - const struct anv_shader_bin *cs_bin = pipeline->cs; - bool predicate = cmd_buffer->state.conditional_render_enabled; - - const struct intel_device_info *devinfo = pipeline->base.device->info; - const struct brw_cs_dispatch_info dispatch = - brw_cs_get_dispatch_info(devinfo, prog_data, NULL); - - anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) { - cw.IndirectParameterEnable = indirect; - cw.PredicateEnable = predicate; - cw.SIMDSize = dispatch.simd_size / 16; - cw.IndirectDataStartAddress = comp_state->push_data.offset; - cw.IndirectDataLength = comp_state->push_data.alloc_size; - cw.LocalXMaximum = prog_data->local_size[0] - 1; - cw.LocalYMaximum = prog_data->local_size[1] - 1; - cw.LocalZMaximum = prog_data->local_size[2] - 1; - cw.ThreadGroupIDXDimension = groupCountX; - cw.ThreadGroupIDYDimension = groupCountY; - cw.ThreadGroupIDZDimension = groupCountZ; - cw.ExecutionMask = dispatch.right_mask; - cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0); - - cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) { - .KernelStartPointer = cs_bin->kernel.offset, - .SamplerStatePointer = - cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset, - .BindingTablePointer = - cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset, - .BindingTableEntryCount = - 1 + MIN2(pipeline->cs->bind_map.surface_count, 30), - .NumberofThreadsinGPGPUThreadGroup = dispatch.threads, - .SharedLocalMemorySize = encode_slm_size(GFX_VER, - prog_data->base.total_shared), - .NumberOfBarriers = prog_data->uses_barrier, - }; - } -} - -#else /* #if GFX_VERx10 >= 125 */ - static inline void emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer, const struct anv_compute_pipeline *pipeline, bool indirect, @@ -5182,8 +4641,6 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH), msf); } -#endif /* #if GFX_VERx10 >= 125 */ - static inline void emit_cs_walker(struct anv_cmd_buffer *cmd_buffer, const struct anv_compute_pipeline *pipeline, bool indirect, @@ -5191,13 +4648,8 @@ emit_cs_walker(struct anv_cmd_buffer *cmd_buffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { -#if GFX_VERx10 >= 125 - emit_compute_walker(cmd_buffer, pipeline, indirect, prog_data, groupCountX, - groupCountY, groupCountZ); -#else emit_gpgpu_walker(cmd_buffer, pipeline, indirect, prog_data, groupCountX, groupCountY, groupCountZ); -#endif } void genX(CmdDispatchBase)( @@ -5365,47 +4817,6 @@ void genX(CmdDispatchIndirect)( trace_intel_end_compute(&cmd_buffer->trace, 0, 0, 0); } -struct anv_state -genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer) -{ -#if GFX_VERx10 >= 125 - struct anv_device *device = cmd_buffer->device; - - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - BRW_RT_DISPATCH_GLOBALS_SIZE, - 64); - struct brw_rt_scratch_layout layout; - uint32_t stack_ids_per_dss = 2048; /* TODO: can we use a lower value in - * some cases? - */ - brw_rt_compute_scratch_layout(&layout, device->info, - stack_ids_per_dss, 1 << 10); - - struct GFX_RT_DISPATCH_GLOBALS rtdg = { - .MemBaseAddress = (struct anv_address) { - /* The ray query HW computes offsets from the top of the buffer, so - * let the address at the end of the buffer. - */ - .bo = device->ray_query_bo, - .offset = device->ray_query_bo->size - }, - .AsyncRTStackSize = layout.ray_stack_stride / 64, - .NumDSSRTStacks = layout.stack_ids_per_dss, - .MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS, - .Flags = RT_DEPTH_TEST_LESS_EQUAL, - .ResumeShaderTable = (struct anv_address) { - .bo = cmd_buffer->state.ray_query_shadow_bo, - }, - }; - GFX_RT_DISPATCH_GLOBALS_pack(NULL, state.map, &rtdg); - - return state; -#else - unreachable("Not supported"); -#endif -} - static void genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, uint32_t pipeline) @@ -5415,7 +4826,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.current_pipeline == pipeline) return; -#if GFX_VER >= 8 && GFX_VER < 10 +#if GFX_VER >= 8 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: * * Software must clear the COLOR_CALC_STATE Valid field in @@ -5429,30 +4840,6 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t); #endif -#if GFX_VER == 9 - if (pipeline == _3D) { - /* There is a mid-object preemption workaround which requires you to - * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D. However, - * even without preemption, we have issues with geometry flickering when - * GPGPU and 3D are back-to-back and this seems to fix it. We don't - * really know why. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { - vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * devinfo->subslice_total - 1; - vfe.NumberofURBEntries = 2; - vfe.URBEntryAllocationSize = 2; - } - - /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is - * invalid. Set the compute pipeline to dirty to force a re-emit of the - * pipeline in case we get back-to-back dispatch calls with the same - * pipeline and a PIPELINE_SELECT in between. - */ - cmd_buffer->state.compute.pipeline_dirty = true; - } -#endif - /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * @@ -5480,30 +4867,9 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) { -#if GFX_VER >= 9 - ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3; - ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12; -#endif ps.PipelineSelection = pipeline; } -#if GFX_VER == 9 - if (devinfo->platform == INTEL_PLATFORM_GLK) { - /* Project: DevGLK - * - * "This chicken bit works around a hardware issue with barrier logic - * encountered when switching between GPGPU and 3D pipelines. To - * workaround the issue, this mode bit should be set after a pipeline - * is selected." - */ - anv_batch_write_reg(&cmd_buffer->batch, GENX(SLICE_COMMON_ECO_CHICKEN1), scec1) { - scec1.GLKBarrierMode = pipeline == GPGPU ? GLK_BARRIER_MODE_GPGPU - : GLK_BARRIER_MODE_3D_HULL; - scec1.GLKBarrierModeMask = 1; - } - } -#endif - cmd_buffer->state.current_pipeline = pipeline; } @@ -5543,9 +4909,6 @@ genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer) } anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { pipe.DepthCacheFlushEnable = true; -#if GFX_VER >= 12 - pipe.TileCacheFlushEnable = true; -#endif anv_debug_dump_pc(pipe); } anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { @@ -5554,54 +4917,6 @@ genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer) } } -void -genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer, - const struct isl_surf *surf) -{ -#if GFX_VERx10 == 120 - const bool is_d16_1x_msaa = surf->format == ISL_FORMAT_R16_UNORM && - surf->samples == 1; - - switch (cmd_buffer->state.depth_reg_mode) { - case ANV_DEPTH_REG_MODE_HW_DEFAULT: - if (!is_d16_1x_msaa) - return; - break; - case ANV_DEPTH_REG_MODE_D16_1X_MSAA: - if (is_d16_1x_msaa) - return; - break; - case ANV_DEPTH_REG_MODE_UNKNOWN: - break; - } - - /* We'll change some CHICKEN registers depending on the depth surface - * format. Do a depth flush and stall so the pipeline is not using these - * settings while we change the registers. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | - ANV_PIPE_DEPTH_STALL_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "Workaround: Stop pipeline for 14010455700"); - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - /* Wa_14010455700 - * - * To avoid sporadic corruptions “Set 0x7010[9] when Depth Buffer - * Surface Format is D16_UNORM , surface type is not NULL & 1X_MSAA”. - */ - anv_batch_write_reg(&cmd_buffer->batch, GENX(COMMON_SLICE_CHICKEN1), reg) { - reg.HIZPlaneOptimizationdisablebit = is_d16_1x_msaa; - reg.HIZPlaneOptimizationdisablebitMask = true; - } - - cmd_buffer->state.depth_reg_mode = - is_d16_1x_msaa ? ANV_DEPTH_REG_MODE_D16_1X_MSAA : - ANV_DEPTH_REG_MODE_HW_DEFAULT; -#endif -} - /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS: * * "The VF cache needs to be invalidated before binding and then using @@ -5635,8 +4950,7 @@ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer struct anv_address vb_address, uint32_t vb_size) { - if (GFX_VER < 8 || GFX_VER > 9 || - anv_use_relocations(cmd_buffer->device->physical)) + if (GFX_VER < 8 || anv_use_relocations(cmd_buffer->device->physical)) return; struct anv_vb_cache_range *bound, *dirty; @@ -5666,8 +4980,7 @@ genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_b uint32_t access_type, uint64_t vb_used) { - if (GFX_VER < 8 || GFX_VER > 9 || - anv_use_relocations(cmd_buffer->device->physical)) + if (GFX_VER < 8 || anv_use_relocations(cmd_buffer->device->physical)) return; if (access_type == RANDOM) { @@ -5699,97 +5012,6 @@ genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_b } } -/** - * Update the pixel hashing modes that determine the balancing of PS threads - * across subslices and slices. - * - * \param width Width bound of the rendering area (already scaled down if \p - * scale is greater than 1). - * \param height Height bound of the rendering area (already scaled down if \p - * scale is greater than 1). - * \param scale The number of framebuffer samples that could potentially be - * affected by an individual channel of the PS thread. This is - * typically one for single-sampled rendering, but for operations - * like CCS resolves and fast clears a single PS invocation may - * update a huge number of pixels, in which case a finer - * balancing is desirable in order to maximally utilize the - * bandwidth available. UINT_MAX can be used as shorthand for - * "finest hashing mode available". - */ -void -genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer, - unsigned width, unsigned height, - unsigned scale) -{ -#if GFX_VER == 9 - const struct intel_device_info *devinfo = cmd_buffer->device->info; - const unsigned slice_hashing[] = { - /* Because all Gfx9 platforms with more than one slice require - * three-way subslice hashing, a single "normal" 16x16 slice hashing - * block is guaranteed to suffer from substantial imbalance, with one - * subslice receiving twice as much work as the other two in the - * slice. - * - * The performance impact of that would be particularly severe when - * three-way hashing is also in use for slice balancing (which is the - * case for all Gfx9 GT4 platforms), because one of the slices - * receives one every three 16x16 blocks in either direction, which - * is roughly the periodicity of the underlying subslice imbalance - * pattern ("roughly" because in reality the hardware's - * implementation of three-way hashing doesn't do exact modulo 3 - * arithmetic, which somewhat decreases the magnitude of this effect - * in practice). This leads to a systematic subslice imbalance - * within that slice regardless of the size of the primitive. The - * 32x32 hashing mode guarantees that the subslice imbalance within a - * single slice hashing block is minimal, largely eliminating this - * effect. - */ - _32x32, - /* Finest slice hashing mode available. */ - NORMAL - }; - const unsigned subslice_hashing[] = { - /* 16x16 would provide a slight cache locality benefit especially - * visible in the sampler L1 cache efficiency of low-bandwidth - * non-LLC platforms, but it comes at the cost of greater subslice - * imbalance for primitives of dimensions approximately intermediate - * between 16x4 and 16x16. - */ - _16x4, - /* Finest subslice hashing mode available. */ - _8x4 - }; - /* Dimensions of the smallest hashing block of a given hashing mode. If - * the rendering area is smaller than this there can't possibly be any - * benefit from switching to this mode, so we optimize out the - * transition. - */ - const unsigned min_size[][2] = { - { 16, 4 }, - { 8, 4 } - }; - const unsigned idx = scale > 1; - - if (cmd_buffer->state.current_hash_scale != scale && - (width > min_size[idx][0] || height > min_size[idx][1])) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT, - "change pixel hash mode"); - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - anv_batch_write_reg(&cmd_buffer->batch, GENX(GT_MODE), gt) { - gt.SliceHashing = (devinfo->num_slices > 1 ? slice_hashing[idx] : 0); - gt.SliceHashingMask = (devinfo->num_slices > 1 ? -1 : 0); - gt.SubsliceHashing = subslice_hashing[idx]; - gt.SubsliceHashingMask = -1; - } - - cmd_buffer->state.current_hash_scale = scale; - } -#endif -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { @@ -5891,65 +5113,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info); - if (info.depth_surf) - genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, info.depth_surf); - - if (GFX_VER >= 12) { - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - /* Wa_1408224581 - * - * Workaround: Gfx12LP Astep only An additional pipe control with - * post-sync = store dword operation would be required.( w/a is to - * have an additional pipe control after the stencil state whenever - * the surface state bits of this state is changing). - * - * This also seems sufficient to handle Wa_14014148106. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.PostSyncOperation = WriteImmediateData; - pc.Address = cmd_buffer->device->workaround_address; - } - } cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage); } -static void -cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image_view *fsr_iview) -{ -#if GFX_VERx10 >= 125 - struct anv_device *device = cmd_buffer->device; - - if (!device->vk.enabled_extensions.KHR_fragment_shading_rate) - return; - - uint32_t *dw = anv_batch_emit_dwords(&cmd_buffer->batch, - device->isl_dev.cpb.size / 4); - if (dw == NULL) - return; - - struct isl_cpb_emit_info info = { }; - - if (fsr_iview) { - info.view = &fsr_iview->planes[0].isl; - info.surf = &fsr_iview->image->planes[0].primary_surface.isl; - info.address = - anv_batch_emit_reloc(&cmd_buffer->batch, - dw + device->isl_dev.cpb.offset / 4, - fsr_iview->image->bindings[0].address.bo, - fsr_iview->image->bindings[0].address.offset + - fsr_iview->image->bindings[0].memory_range.offset); - info.mocs = - anv_mocs(device, fsr_iview->image->bindings[0].address.bo, - ISL_SURF_USAGE_CPB_BIT); - } - - isl_emit_cpb_control_s(&device->isl_dev, dw, &info); -#endif /* GFX_VERx10 >= 125 */ -} - static VkImageLayout attachment_initial_layout(const VkRenderingAttachmentInfo *att) { @@ -6178,8 +5344,7 @@ void genX(CmdBeginRendering)( add_surface_state_relocs(cmd_buffer, gfx->color_att[i].surface_state); - if (GFX_VER < 10 && - (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || + if ((att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || (gfx->rendering_flags & VK_RENDERING_RESUMING_BIT)) && iview->image->planes[0].aux_usage != ISL_AUX_USAGE_NONE && iview->planes[0].isl.base_level == 0 && @@ -6199,15 +5364,6 @@ void genX(CmdBeginRendering)( } } - const struct anv_image_view *fsr_iview = NULL; - const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att = - vk_find_struct_const(pRenderingInfo->pNext, - RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); - if (fsr_att != NULL && fsr_att->imageView != VK_NULL_HANDLE) { - fsr_iview = anv_image_view_from_handle(fsr_att->imageView); - /* imageLayout and shadingRateAttachmentTexelSize are ignored */ - } - const struct anv_image_view *ds_iview = NULL; const VkRenderingAttachmentInfo *d_att = pRenderingInfo->pDepthAttachment; const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment; @@ -6448,24 +5604,7 @@ void genX(CmdBeginRendering)( */ gfx->dirty |= ANV_CMD_DIRTY_PIPELINE; -#if GFX_VER >= 11 - /* The PIPE_CONTROL command description says: - * - * "Whenever a Binding Table Index (BTI) used by a Render Target Message - * points to a different RENDER_SURFACE_STATE, SW must issue a Render - * Target Cache Flush by enabling this bit. When render target flush - * is set due to new association of BTI, PS Scoreboard Stall bit must - * be set in this packet." - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT, - "change RT"); -#endif - cmd_buffer_emit_depth_stencil(cmd_buffer); - - cmd_buffer_emit_cps_control_buffer(cmd_buffer, fsr_iview); } static void @@ -6950,21 +6089,12 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)( switch (pOverrideInfo->type) { case VK_PERFORMANCE_OVERRIDE_TYPE_NULL_HARDWARE_INTEL: { -#if GFX_VER >= 9 - anv_batch_write_reg(&cmd_buffer->batch, GENX(CS_DEBUG_MODE2), csdm2) { - csdm2._3DRenderingInstructionDisable = pOverrideInfo->enable; - csdm2.MediaInstructionDisable = pOverrideInfo->enable; - csdm2._3DRenderingInstructionDisableMask = true; - csdm2.MediaInstructionDisableMask = true; - } -#else anv_batch_write_reg(&cmd_buffer->batch, GENX(INSTPM), instpm) { instpm._3DRenderingInstructionDisable = pOverrideInfo->enable; instpm.MediaInstructionDisable = pOverrideInfo->enable; instpm._3DRenderingInstructionDisableMask = true; instpm.MediaInstructionDisableMask = true; } -#endif break; } diff --git a/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c b/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c index 8972a0c73fd..afd29bad80c 100644 --- a/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/gfx8_cmd_buffer.c @@ -53,29 +53,8 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) pc.DepthCacheFlushEnable = true; pc.CommandStreamerStallEnable = true; pc.RenderTargetCacheFlushEnable = true; -#if GFX_VER >= 12 - pc.TileCacheFlushEnable = true; - - /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must - * be set with any PIPE_CONTROL with Depth Flush Enable bit set. - */ - pc.DepthStallEnable = true; -#endif } -#if GFX_VER == 9 - - uint32_t cache_mode; - anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0), - .STCPMAOptimizationEnable = enable, - .STCPMAOptimizationEnableMask = true); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = GENX(CACHE_MODE_0_num); - lri.DataDWord = cache_mode; - } - -#elif GFX_VER == 8 - uint32_t cache_mode; anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1), .NPPMAFixEnable = enable, @@ -87,8 +66,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) lri.DataDWord = cache_mode; } -#endif /* GFX_VER == 8 */ - /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache * Flush bits is often necessary. We do it regardless because it's easier. * The render cache flush is also necessary if stencil writes are enabled. @@ -100,9 +77,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) pc.DepthStallEnable = true; pc.DepthCacheFlushEnable = true; pc.RenderTargetCacheFlushEnable = true; -#if GFX_VER >= 12 - pc.TileCacheFlushEnable = true; -#endif } } @@ -196,122 +170,6 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer, wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; } -UNUSED static bool -want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, - const struct vk_depth_stencil_state *ds) -{ - if (GFX_VER > 9) - return false; - assert(GFX_VER == 9); - - /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable: - * - * Clearing this bit will force the STC cache to wait for pending - * retirement of pixels at the HZ-read stage and do the STC-test for - * Non-promoted, R-computed and Computed depth modes instead of - * postponing the STC-test to RCPFE. - * - * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && - * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable - * - * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && - * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && - * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) - * - * COMP_STC_EN = STC_TEST_EN && - * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil - * - * SW parses the pipeline states to generate the following logical - * signal indicating if PMA FIX can be enabled. - * - * STC_PMA_OPT = - * 3DSTATE_WM::ForceThreadDispatch != 1 && - * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && - * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && - * 3DSTATE_DEPTH_BUFFER::HIZ Enable && - * !(3DSTATE_WM::EDSC_Mode == 2) && - * 3DSTATE_PS_EXTRA::PixelShaderValid && - * !(3DSTATE_WM_HZ_OP::DepthBufferClear || - * 3DSTATE_WM_HZ_OP::DepthBufferResolve || - * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || - * 3DSTATE_WM_HZ_OP::StencilBufferClear) && - * (COMP_STC_EN || STC_WRITE_EN) && - * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || - * 3DSTATE_WM::ForceKillPix == ON || - * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || - * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || - * 3DSTATE_PS_BLEND::AlphaTestEnable || - * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || - * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) - */ - - /* These are always true: - * 3DSTATE_WM::ForceThreadDispatch != 1 && - * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) - */ - - /* We only enable the PMA fix if we know for certain that HiZ is enabled. - * If we don't know whether HiZ is enabled or not, we disable the PMA fix - * and there is no harm. - * - * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && - * 3DSTATE_DEPTH_BUFFER::HIZ Enable - */ - if (!cmd_buffer->state.hiz_enabled) - return false; - - /* We can't possibly know if HiZ is enabled without the depth attachment */ - ASSERTED const struct anv_image_view *d_iview = - cmd_buffer->state.gfx.depth_att.iview; - assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); - - /* 3DSTATE_PS_EXTRA::PixelShaderValid */ - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) - return false; - - /* !(3DSTATE_WM::EDSC_Mode == 2) */ - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - if (wm_prog_data->early_fragment_tests) - return false; - - /* We never use anv_pipeline for HiZ ops so this is trivially true: - * !(3DSTATE_WM_HZ_OP::DepthBufferClear || - * 3DSTATE_WM_HZ_OP::DepthBufferResolve || - * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || - * 3DSTATE_WM_HZ_OP::StencilBufferClear) - */ - - /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && - * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable - */ - const bool stc_test_en = ds->stencil.test_enable; - - /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && - * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && - * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) - */ - const bool stc_write_en = ds->stencil.write_enable; - - /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ - const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; - - /* COMP_STC_EN || STC_WRITE_EN */ - if (!(comp_stc_en || stc_write_en)) - return false; - - /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || - * 3DSTATE_WM::ForceKillPix == ON || - * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || - * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || - * 3DSTATE_PS_BLEND::AlphaTestEnable || - * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || - * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF) - */ - return pipeline->kill_pixel || - wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -319,27 +177,17 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) const struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; -#if GFX_VER >= 11 - if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate && - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) - genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr); -#endif /* GFX_VER >= 11 */ - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) { uint32_t sf_dw[GENX(3DSTATE_SF_length)]; struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), }; -#if GFX_VER == 8 if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) { sf.CHVLineWidth = dyn->rs.line.width; } else { sf.LineWidth = dyn->rs.line.width; } -#else - sf.LineWidth = dyn->rs.line.width, -#endif GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf); } @@ -394,7 +242,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) * across different state packets for gfx8 and gfx9. We handle that by * using a big old #if switch here. */ -#if GFX_VER == 8 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { struct anv_state cc_state = @@ -462,87 +309,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds); genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); } -#else - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GENX(COLOR_CALC_STATE_length) * 4, - 64); - struct GENX(COLOR_CALC_STATE) cc = { - .BlendConstantColorRed = dyn->cb.blend_constants[0], - .BlendConstantColorGreen = dyn->cb.blend_constants[1], - .BlendConstantColorBlue = dyn->cb.blend_constants[2], - .BlendConstantColorAlpha = dyn->cb.blend_constants[3], - }; - GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { - ccp.ColorCalcStatePointer = cc_state.offset; - ccp.ColorCalcStatePointerValid = true; - } - } - - if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS)) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) { - VkImageAspectFlags ds_aspects = 0; - if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - - struct vk_depth_stencil_state opt_ds = dyn->ds; - vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { - ds.DoubleSidedStencilEnable = true; - - ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; - ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; - - ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; - ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; - - ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff; - ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff; - - ds.DepthTestEnable = opt_ds.depth.test_enable; - ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; - ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; - ds.StencilTestEnable = opt_ds.stencil.test_enable; - ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; - ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; - ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; - ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; - ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; - ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; - ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; - ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; - ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; - } - - const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds); - genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); - } -#endif - -#if GFX_VER >= 12 - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { - db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable; - db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min; - db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max; - } - } -#endif if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { @@ -557,9 +323,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_INDEX_BUFFER)) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { -#if GFX_VERx10 >= 125 - vf.GeometryDistributionEnable = true; -#endif vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable; vf.CutIndex = cmd_buffer->state.gfx.restart_index; } @@ -573,46 +336,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) ib.MOCS = anv_mocs(cmd_buffer->device, buffer->address.bo, ISL_SURF_USAGE_INDEX_BUFFER_BIT); -#if GFX_VER >= 12 - ib.L3BypassDisable = true; -#endif ib.BufferStartingAddress = anv_address_add(buffer->address, offset); ib.BufferSize = vk_buffer_range(&buffer->vk, offset, VK_WHOLE_SIZE); } } -#if GFX_VERx10 >= 125 - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) { - /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/ - vfg.DistributionMode = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT : - RR_FREE; - vfg.DistributionGranularity = BatchLevelGranularity; - /* Wa_14014890652 */ - if (intel_device_info_is_dg2(cmd_buffer->device->info)) - vfg.GranularityThresholdDisable = 1; - vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable; - /* 192 vertices for TRILIST_ADJ */ - vfg.ListNBatchSizeScale = 0; - /* Batch size of 384 vertices */ - vfg.List3BatchSizeScale = 2; - /* Batch size of 128 vertices */ - vfg.List2BatchSizeScale = 1; - /* Batch size of 128 vertices */ - vfg.List1BatchSizeScale = 2; - /* Batch size of 256 vertices for STRIP topologies */ - vfg.StripBatchSizeScale = 3; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeScale = 1; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeMultiplier = 31; - } - } -#endif - if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations); |