summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2024-04-27 07:41:38 -0400
committerMarge Bot <emma+marge@anholt.net>2024-05-15 06:42:33 +0000
commitb771d1355747d9379761dbc709394d792c628b39 (patch)
tree739634f5bf5196d3ea9f4fba6bf3b52f5b0b80ef
parent995e7d927c2613e56a05bff98acc2bf0e02b7782 (diff)
radeonsi: replace the clear_12bytes_buffer shader with the DMA compute shader
It can handle 12-byte clear values with these trivial changes. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29053>
-rw-r--r--src/gallium/drivers/radeonsi/si_compute_blit.c38
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shaderlib_nir.c19
3 files changed, 5 insertions, 53 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index 0821562a2ed..e505266cd28 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -285,32 +285,6 @@ void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource *
1, &sb, 0x1);
}
-static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe_resource *dst,
- unsigned dst_offset, unsigned size,
- const uint32_t *clear_value, unsigned flags,
- enum si_coherency coher)
-{
- assert(dst_offset % 4 == 0);
- assert(size % 4 == 0);
- unsigned size_12 = DIV_ROUND_UP(size, 12);
-
- struct pipe_shader_buffer sb = {0};
- sb.buffer = dst;
- sb.buffer_offset = dst_offset;
- sb.buffer_size = size;
-
- memcpy(sctx->cs_user_data, clear_value, 12);
-
- struct pipe_grid_info info = {0};
- set_work_size(&info, 64, 1, 1, size_12, 1, 1);
-
- if (!sctx->cs_clear_12bytes_buffer)
- sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx);
-
- si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_clear_12bytes_buffer, flags, coher,
- 1, &sb, 0x1);
-}
-
static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_resource *dst,
unsigned dst_offset, struct pipe_resource *src,
unsigned src_offset, unsigned size,
@@ -325,7 +299,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
assert(!src || src_offset + size <= src->width0);
bool is_copy = src != NULL;
- unsigned dwords_per_thread = 4;
+ unsigned dwords_per_thread = clear_value_size == 12 ? 3 : 4;
unsigned num_threads = DIV_ROUND_UP(size, dwords_per_thread * 4);
struct pipe_grid_info info = {};
@@ -342,13 +316,14 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
sb[0].buffer_size = size;
} else {
assert(clear_value_size >= 4 && clear_value_size <= 16 &&
- util_is_power_of_two_or_zero(clear_value_size));
+ (clear_value_size == 12 || util_is_power_of_two_or_zero(clear_value_size)));
for (unsigned i = 0; i < 4; i++)
sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)];
}
- void **shader = is_copy ? &sctx->cs_copy_buffer : &sctx->cs_clear_buffer;
+ void **shader = is_copy ? &sctx->cs_copy_buffer :
+ clear_value_size == 12 ? &sctx->cs_clear_12bytes_buffer : &sctx->cs_clear_buffer;
if (!*shader)
*shader = si_create_dma_compute_shader(sctx, dwords_per_thread, !is_copy);
@@ -377,11 +352,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped))
clear_value = &clamped;
- if (clear_value_size == 12) {
- si_compute_clear_12bytes_buffer(sctx, dst, offset, size, clear_value, flags, coher);
- return;
- }
-
uint64_t aligned_size = size & ~3ull;
if (aligned_size >= 4) {
uint64_t compute_min_size;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index c1c632e2081..2b12327d237 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1727,7 +1727,6 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx);
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type);
-void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array);
void *si_create_query_result_cs(struct si_context *sctx);
void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
index efc333c9706..8f565de5ff0 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@@ -631,23 +631,6 @@ void *si_clear_image_dcc_single_shader(struct si_context *sctx, bool is_msaa, un
return create_shader_state(sctx, b.shader);
}
-void *si_clear_12bytes_buffer_shader(struct si_context *sctx)
-{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
- "clear_12bytes_buffer");
- b.shader->info.workgroup_size[0] = 64;
- b.shader->info.workgroup_size[1] = 1;
- b.shader->info.workgroup_size[2] = 1;
- b.shader->info.cs.user_data_components_amd = 3;
-
- nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12);
- nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3);
-
- nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset);
-
- return create_shader_state(sctx, b.shader);
-}
-
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
@@ -672,7 +655,7 @@ void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_per_thread,
bool is_clear)
{
- assert(util_is_power_of_two_nonzero(num_dwords_per_thread) && num_dwords_per_thread <= 4);
+ assert(num_dwords_per_thread && num_dwords_per_thread <= 4);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
"create_dma_compute");