diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2021-03-10 10:42:15 +0000 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2021-04-18 22:13:19 +0200 |
commit | d90e5fe88aef9c5941ef7af0d86286737d67ae59 (patch) | |
tree | 1b091820e10171b840d00020a8cb6a70a84dc671 | |
parent | da74e86cb7346411847fcc85e66c456bd6a7417c (diff) |
radv: fix barrier in radv_decompress_dcc_compute shader
ACO doesn't create a waitcnt for barriers between texture samples and
image stores because texture samples are supposed to use read-only
memory. It could also schedule the barrier to above the texture sample.
We also have use a larger memory scope to avoid an ACO optimization.
Tested on GFX8 with Sachsa Willems deferred sample. With some DCC
decompressions and the compute path forced.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: 21.1 <mesa-stable>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9496>
(cherry picked from commit ec70882238683a95bc93387a12c2439488e2c084)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_image.c | 12 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_fast_clear.c | 44 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 1 |
4 files changed, 26 insertions, 33 deletions
diff --git a/.pick_status.json b/.pick_status.json index 6fccacaa724..d3169815b4d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1066,7 +1066,7 @@ "description": "radv: fix barrier in radv_decompress_dcc_compute shader", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 7e473156c81..51259b0cc0a 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -1658,7 +1658,8 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ static void radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, VkFormat vk_format, const VkComponentMapping *components, - bool is_storage_image, bool disable_compression, unsigned plane_id, + bool is_storage_image, bool disable_compression, + bool enable_compression, unsigned plane_id, unsigned descriptor_plane_id) { struct radv_image *image = iview->image; @@ -1699,7 +1700,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic } bool enable_write_compression = radv_image_use_dcc_image_stores(device, image); - if (is_storage_image && !enable_write_compression) + if (is_storage_image && !(enable_write_compression || enable_compression)) disable_compression = true; si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip, iview->base_mip, blk_w, is_stencil, is_storage_image, @@ -1898,13 +1899,16 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview); bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false; + bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false; for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) { VkFormat format = vk_format_get_plane_format(iview->vk_format, i); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false, - disable_compression, iview->plane_id + i, i); + disable_compression, enable_compression, iview->plane_id + i, + i); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true, - disable_compression, iview->plane_id + i, i); + disable_compression, enable_compression, iview->plane_id + i, + i); } } diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 9a62664fc95..b7680036f40 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -31,8 +31,6 @@ static nir_shader * build_dcc_decompress_compute_shader(struct radv_device *dev) { - const struct glsl_type *buf_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT); nir_builder b = @@ -42,7 +40,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) b.shader->info.cs.local_size[0] = 16; b.shader->info.cs.local_size[1] = 16; b.shader->info.cs.local_size[2] = 1; - nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img"); input_img->data.descriptor_set = 0; input_img->data.binding = 0; @@ -57,30 +55,20 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; - tex->op = nir_texop_txf; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3)); - tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); - tex->src[2].src_type = nir_tex_src_texture_deref; - tex->src[2].src = nir_src_for_ssa(input_img_deref); - tex->dest_type = nir_type_float32; - tex->is_array = false; - tex->coord_components = 2; - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, - .memory_scope = NIR_SCOPE_WORKGROUP, .memory_semantics = NIR_MEMORY_ACQ_REL, - .memory_modes = nir_var_mem_ssbo); + + nir_ssa_def *data = + nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, + nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0)); + + /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid + * creating a vmcnt(0) because it expects the L1 cache to keep memory + * operations in-order for the same workgroup. The vmcnt(0) seems + * necessary however. */ + nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE, + .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, - nir_ssa_undef(&b, 1, 32), &tex->dest.ssa, nir_imm_int(&b, 0)); + nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0)); return b.shader; } @@ -96,7 +84,7 @@ create_dcc_compress_compute(struct radv_device *device) .bindingCount = 2, .pBindings = (VkDescriptorSetLayoutBinding[]){ {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = NULL}, @@ -849,7 +837,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag .baseArrayLayer = subresourceRange->baseArrayLayer + s, .layerCount = 1}, }, - NULL); + &(struct radv_image_view_extra_create_info){.enable_compression = true}); radv_image_view_init( &store_iview, cmd_buffer->device, &(VkImageViewCreateInfo){ @@ -873,7 +861,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .pImageInfo = (VkDescriptorImageInfo[]){ { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 06cb896e1d3..4088af95a5c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2175,6 +2175,7 @@ bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits struct radv_image_view_extra_create_info { bool disable_compression; + bool enable_compression; }; void radv_image_view_init(struct radv_image_view *view, struct radv_device *device, |