summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2022-04-25 00:04:41 -0400
committerMarek Olšák <marek.olsak@amd.com>2022-05-03 11:11:08 -0400
commit8c0669fe3f8b28735483e0e4aa17268fb6732097 (patch)
treeb8ea5f641858574f3c354724e72a46281c4752cf
parent01d994f5e63586004128597a18f0253801d65d04 (diff)
radeonsi: implement compute_copy_image between 1D_ARRAY and other texture types
And set more optimal compute block sizes. The compute copy is required to preserve NaNs, so this fixes a lot of AMD_TEST=copyimage cases. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16215>
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_compute_blit.c41
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c10
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_shaderlib_nir.c29
5 files changed, 50 insertions, 39 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index e559a9fce03..458b98e13b4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -954,9 +954,7 @@ void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst
if (si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true,
vi_dcc_enabled(sdst, dst_level)) &&
si_can_use_compute_blit(sctx, src->format, src->nr_samples, false,
- vi_dcc_enabled(ssrc, src_level)) &&
- !(dst->target != src->target &&
- (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
+ vi_dcc_enabled(ssrc, src_level))) {
si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz,
src_box, false, SI_OP_SYNC_BEFORE_AFTER);
return;
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index e6ac4b5e64c..95a5b9aa38a 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -504,7 +504,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
enum pipe_format src_format = util_format_linear(src->format);
enum pipe_format dst_format = util_format_linear(dst->format);
bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear;
- bool is_1D = dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY;
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
@@ -624,31 +623,37 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
si_launch_grid_internal(sctx, &info, sctx->cs_dcc_decompress, flags | SI_OP_CS_IMAGE);
} else {
- sctx->cs_user_data[0] = src_box->x | (dstx << 16);
-
- int block_x = is_1D || is_linear ? 64 : 8;
- int block_y = is_1D || is_linear ? 1 : 8;
+ bool dst_is_1d = dst->target == PIPE_TEXTURE_1D ||
+ dst->target == PIPE_TEXTURE_1D_ARRAY;
+ bool src_is_1d = src->target == PIPE_TEXTURE_1D ||
+ src->target == PIPE_TEXTURE_1D_ARRAY;
+ int block_x, block_y;
int block_z = 1;
- if (is_1D) {
- assert(height == 1); /* height is not used for 1D images */
- assert(src_box->y == 0 && dsty == 0);
-
- sctx->cs_user_data[1] = src_box->z | (dstz << 16);
-
- /* We pass array index in 'y' for 1D images. */
- height = depth;
- depth = 1;
+ /* Choose the block dimensions based on the copy area size. */
+ if (src_box->height <= 4) {
+ block_y = util_next_power_of_two(src_box->height);
+ block_x = 64 / block_y;
+ } else if (src_box->width <= 4) {
+ block_x = util_next_power_of_two(src_box->width);
+ block_y = 64 / block_x;
+ } else if (is_linear) {
+ block_x = 64;
+ block_y = 1;
} else {
- sctx->cs_user_data[1] = src_box->y | (dsty << 16);
- sctx->cs_user_data[2] = src_box->z | (dstz << 16);
+ block_x = 8;
+ block_y = 8;
}
+ sctx->cs_user_data[0] = src_box->x | (dstx << 16);
+ sctx->cs_user_data[1] = src_box->y | (dsty << 16);
+ sctx->cs_user_data[2] = src_box->z | (dstz << 16);
+
set_work_size(&info, block_x, block_y, block_z, width, height, depth);
- void **copy_image_cs_ptr = is_1D ? &sctx->cs_copy_image_1D : &sctx->cs_copy_image_2D;
+ void **copy_image_cs_ptr = &sctx->cs_copy_image[src_is_1d][dst_is_1d];
if (!*copy_image_cs_ptr)
- *copy_image_cs_ptr = si_create_copy_image_cs(sctx, is_1D);
+ *copy_image_cs_ptr = si_create_copy_image_cs(sctx, src_is_1d, dst_is_1d);
assert(*copy_image_cs_ptr);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 20af97e12f2..0d30845366b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -258,10 +258,12 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer_rmw);
if (sctx->cs_copy_buffer)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
- if (sctx->cs_copy_image_1D)
- sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1D);
- if (sctx->cs_copy_image_2D)
- sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_2D);
+ for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) {
+ for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) {
+ if (sctx->cs_copy_image[i][j])
+ sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j]);
+ }
+ }
if (sctx->cs_clear_render_target)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target);
if (sctx->cs_clear_render_target_1d_array)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a9a9bad88fa..d28a48bc16a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -965,8 +965,7 @@ struct si_context {
void *cs_clear_buffer;
void *cs_clear_buffer_rmw;
void *cs_copy_buffer;
- void *cs_copy_image_1D;
- void *cs_copy_image_2D;
+ void *cs_copy_image[2][2]; /* [src_is_1d][dst_is_1d] */
void *cs_clear_render_target;
void *cs_clear_render_target_1d_array;
void *cs_clear_12bytes_buffer;
@@ -1519,7 +1518,7 @@ void si_suspend_queries(struct si_context *sctx);
void si_resume_queries(struct si_context *sctx);
/* si_shaderlib_nir.c */
-void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D);
+void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array);
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
index 33de79b8e99..86b93b63cf0 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@@ -65,7 +65,7 @@ deref_ssa(nir_builder *b, nir_variable *var)
* It expects the source and destination (x,y,z) coords as user_data_amd,
* packed into 3 SGPRs as 2x16bits per component.
*/
-void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
+void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array)
{
const nir_shader_compiler_options *options =
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
@@ -78,12 +78,8 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
*/
b.shader->info.workgroup_size_variable = true;
- /* 1D uses 'x' as image coord, and 'y' as array index.
- * 2D uses 'x'&'y' as image coords, and 'z' as array index.
- */
- int n_components = is_1D ? 2 : 3;
- b.shader->info.cs.user_data_components_amd = n_components;
- nir_ssa_def *ids = get_global_ids(&b, n_components);
+ b.shader->info.cs.user_data_components_amd = 3;
+ nir_ssa_def *ids = get_global_ids(&b, 3);
nir_ssa_def *coord_src = NULL, *coord_dst = NULL;
unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst);
@@ -91,13 +87,24 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
coord_src = nir_iadd(&b, coord_src, ids);
coord_dst = nir_iadd(&b, coord_dst, ids);
- const struct glsl_type *img_type = glsl_image_type(is_1D ? GLSL_SAMPLER_DIM_1D : GLSL_SAMPLER_DIM_2D,
- /*is_array*/ true, GLSL_TYPE_FLOAT);
+ static unsigned swizzle_xz[] = {0, 2, 0, 0};
+
+ if (src_is_1d_array)
+ coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4);
+ if (dst_is_1d_array)
+ coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4);
+
+ const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D
+ : GLSL_SAMPLER_DIM_2D,
+ /*is_array*/ true, GLSL_TYPE_FLOAT);
+ const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D
+ : GLSL_SAMPLER_DIM_2D,
+ /*is_array*/ true, GLSL_TYPE_FLOAT);
- nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src");
+ nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src");
img_src->data.binding = 0;
- nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, img_type, "img_dst");
+ nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst");
img_dst->data.binding = 1;
nir_ssa_def *undef32 = nir_ssa_undef(&b, 1, 32);