diff options
author | Marek Olšák <marek.olsak@amd.com> | 2024-04-27 07:41:38 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-05-15 06:42:33 +0000 |
commit | b771d1355747d9379761dbc709394d792c628b39 (patch) | |
tree | 739634f5bf5196d3ea9f4fba6bf3b52f5b0b80ef | |
parent | 995e7d927c2613e56a05bff98acc2bf0e02b7782 (diff) |
radeonsi: replace the clear_12bytes_buffer shader with the DMA compute shader
It can handle 12-byte clear values with these trivial changes.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29053>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute_blit.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 19 |
3 files changed, 5 insertions, 53 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 0821562a2ed..e505266cd28 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -285,32 +285,6 @@ void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource * 1, &sb, 0x1); } -static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe_resource *dst, - unsigned dst_offset, unsigned size, - const uint32_t *clear_value, unsigned flags, - enum si_coherency coher) -{ - assert(dst_offset % 4 == 0); - assert(size % 4 == 0); - unsigned size_12 = DIV_ROUND_UP(size, 12); - - struct pipe_shader_buffer sb = {0}; - sb.buffer = dst; - sb.buffer_offset = dst_offset; - sb.buffer_size = size; - - memcpy(sctx->cs_user_data, clear_value, 12); - - struct pipe_grid_info info = {0}; - set_work_size(&info, 64, 1, 1, size_12, 1, 1); - - if (!sctx->cs_clear_12bytes_buffer) - sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx); - - si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_clear_12bytes_buffer, flags, coher, - 1, &sb, 0x1); -} - static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_offset, struct pipe_resource *src, unsigned src_offset, unsigned size, @@ -325,7 +299,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res assert(!src || src_offset + size <= src->width0); bool is_copy = src != NULL; - unsigned dwords_per_thread = 4; + unsigned dwords_per_thread = clear_value_size == 12 ? 3 : 4; unsigned num_threads = DIV_ROUND_UP(size, dwords_per_thread * 4); struct pipe_grid_info info = {}; @@ -342,13 +316,14 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res sb[0].buffer_size = size; } else { assert(clear_value_size >= 4 && clear_value_size <= 16 && - util_is_power_of_two_or_zero(clear_value_size)); + (clear_value_size == 12 || util_is_power_of_two_or_zero(clear_value_size))); for (unsigned i = 0; i < 4; i++) sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)]; } - void **shader = is_copy ? &sctx->cs_copy_buffer : &sctx->cs_clear_buffer; + void **shader = is_copy ? &sctx->cs_copy_buffer : + clear_value_size == 12 ? &sctx->cs_clear_12bytes_buffer : &sctx->cs_clear_buffer; if (!*shader) *shader = si_create_dma_compute_shader(sctx, dwords_per_thread, !is_copy); @@ -377,11 +352,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped)) clear_value = &clamped; - if (clear_value_size == 12) { - si_compute_clear_12bytes_buffer(sctx, dst, offset, size, clear_value, flags, coher); - return; - } - uint64_t aligned_size = size & ~3ull; if (aligned_size >= 4) { uint64_t compute_min_size; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c1c632e2081..2b12327d237 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1727,7 +1727,6 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx); void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type); -void *si_clear_12bytes_buffer_shader(struct si_context *sctx); void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array); void *si_create_query_result_cs(struct si_context *sctx); void *gfx11_create_sh_query_result_cs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index efc333c9706..8f565de5ff0 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -631,23 +631,6 @@ void *si_clear_image_dcc_single_shader(struct si_context *sctx, bool is_msaa, un return create_shader_state(sctx, b.shader); } -void *si_clear_12bytes_buffer_shader(struct si_context *sctx) -{ - nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options, - "clear_12bytes_buffer"); - b.shader->info.workgroup_size[0] = 64; - b.shader->info.workgroup_size[1] = 1; - b.shader->info.workgroup_size[2] = 1; - b.shader->info.cs.user_data_components_amd = 3; - - nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12); - nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3); - - nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset); - - return create_shader_state(sctx, b.shader); -} - void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx) { nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options, @@ -672,7 +655,7 @@ void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx) void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_per_thread, bool is_clear) { - assert(util_is_power_of_two_nonzero(num_dwords_per_thread) && num_dwords_per_thread <= 4); + assert(num_dwords_per_thread && num_dwords_per_thread <= 4); nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options, "create_dma_compute"); |