diff options
author | Marek Olšák <marek.olsak@amd.com> | 2022-04-25 00:04:41 -0400 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2022-05-03 11:11:08 -0400 |
commit | 8c0669fe3f8b28735483e0e4aa17268fb6732097 (patch) | |
tree | b8ea5f641858574f3c354724e72a46281c4752cf | |
parent | 01d994f5e63586004128597a18f0253801d65d04 (diff) |
radeonsi: implement compute_copy_image between 1D_ARRAY and other texture types
And set more optimal compute block sizes.
The compute copy is required to preserve NaNs, so this fixes a lot of
AMD_TEST=copyimage cases.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16215>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute_blit.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 29 |
5 files changed, 50 insertions, 39 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index e559a9fce03..458b98e13b4 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -954,9 +954,7 @@ void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst if (si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true, vi_dcc_enabled(sdst, dst_level)) && si_can_use_compute_blit(sctx, src->format, src->nr_samples, false, - vi_dcc_enabled(ssrc, src_level)) && - !(dst->target != src->target && - (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) { + vi_dcc_enabled(ssrc, src_level))) { si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box, false, SI_OP_SYNC_BEFORE_AFTER); return; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index e6ac4b5e64c..95a5b9aa38a 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -504,7 +504,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u enum pipe_format src_format = util_format_linear(src->format); enum pipe_format dst_format = util_format_linear(dst->format); bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear; - bool is_1D = dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY; assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format)); @@ -624,31 +623,37 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u si_launch_grid_internal(sctx, &info, sctx->cs_dcc_decompress, flags | SI_OP_CS_IMAGE); } else { - sctx->cs_user_data[0] = src_box->x | (dstx << 16); - - int block_x = is_1D || is_linear ? 64 : 8; - int block_y = is_1D || is_linear ? 1 : 8; + bool dst_is_1d = dst->target == PIPE_TEXTURE_1D || + dst->target == PIPE_TEXTURE_1D_ARRAY; + bool src_is_1d = src->target == PIPE_TEXTURE_1D || + src->target == PIPE_TEXTURE_1D_ARRAY; + int block_x, block_y; int block_z = 1; - if (is_1D) { - assert(height == 1); /* height is not used for 1D images */ - assert(src_box->y == 0 && dsty == 0); - - sctx->cs_user_data[1] = src_box->z | (dstz << 16); - - /* We pass array index in 'y' for 1D images. */ - height = depth; - depth = 1; + /* Choose the block dimensions based on the copy area size. */ + if (src_box->height <= 4) { + block_y = util_next_power_of_two(src_box->height); + block_x = 64 / block_y; + } else if (src_box->width <= 4) { + block_x = util_next_power_of_two(src_box->width); + block_y = 64 / block_x; + } else if (is_linear) { + block_x = 64; + block_y = 1; } else { - sctx->cs_user_data[1] = src_box->y | (dsty << 16); - sctx->cs_user_data[2] = src_box->z | (dstz << 16); + block_x = 8; + block_y = 8; } + sctx->cs_user_data[0] = src_box->x | (dstx << 16); + sctx->cs_user_data[1] = src_box->y | (dsty << 16); + sctx->cs_user_data[2] = src_box->z | (dstz << 16); + set_work_size(&info, block_x, block_y, block_z, width, height, depth); - void **copy_image_cs_ptr = is_1D ? &sctx->cs_copy_image_1D : &sctx->cs_copy_image_2D; + void **copy_image_cs_ptr = &sctx->cs_copy_image[src_is_1d][dst_is_1d]; if (!*copy_image_cs_ptr) - *copy_image_cs_ptr = si_create_copy_image_cs(sctx, is_1D); + *copy_image_cs_ptr = si_create_copy_image_cs(sctx, src_is_1d, dst_is_1d); assert(*copy_image_cs_ptr); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 20af97e12f2..0d30845366b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -258,10 +258,12 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer_rmw); if (sctx->cs_copy_buffer) sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer); - if (sctx->cs_copy_image_1D) - sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1D); - if (sctx->cs_copy_image_2D) - sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_2D); + for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) { + if (sctx->cs_copy_image[i][j]) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j]); + } + } if (sctx->cs_clear_render_target) sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target); if (sctx->cs_clear_render_target_1d_array) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a9a9bad88fa..d28a48bc16a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -965,8 +965,7 @@ struct si_context { void *cs_clear_buffer; void *cs_clear_buffer_rmw; void *cs_copy_buffer; - void *cs_copy_image_1D; - void *cs_copy_image_2D; + void *cs_copy_image[2][2]; /* [src_is_1d][dst_is_1d] */ void *cs_clear_render_target; void *cs_clear_render_target_1d_array; void *cs_clear_12bytes_buffer; @@ -1519,7 +1518,7 @@ void si_suspend_queries(struct si_context *sctx); void si_resume_queries(struct si_context *sctx); /* si_shaderlib_nir.c */ -void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D); +void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array); void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf); void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 33de79b8e99..86b93b63cf0 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -65,7 +65,7 @@ deref_ssa(nir_builder *b, nir_variable *var) * It expects the source and destination (x,y,z) coords as user_data_amd, * packed into 3 SGPRs as 2x16bits per component. */ -void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D) +void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array) { const nir_shader_compiler_options *options = sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); @@ -78,12 +78,8 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D) */ b.shader->info.workgroup_size_variable = true; - /* 1D uses 'x' as image coord, and 'y' as array index. - * 2D uses 'x'&'y' as image coords, and 'z' as array index. - */ - int n_components = is_1D ? 2 : 3; - b.shader->info.cs.user_data_components_amd = n_components; - nir_ssa_def *ids = get_global_ids(&b, n_components); + b.shader->info.cs.user_data_components_amd = 3; + nir_ssa_def *ids = get_global_ids(&b, 3); nir_ssa_def *coord_src = NULL, *coord_dst = NULL; unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst); @@ -91,13 +87,24 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D) coord_src = nir_iadd(&b, coord_src, ids); coord_dst = nir_iadd(&b, coord_dst, ids); - const struct glsl_type *img_type = glsl_image_type(is_1D ? GLSL_SAMPLER_DIM_1D : GLSL_SAMPLER_DIM_2D, - /*is_array*/ true, GLSL_TYPE_FLOAT); + static unsigned swizzle_xz[] = {0, 2, 0, 0}; + + if (src_is_1d_array) + coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4); + if (dst_is_1d_array) + coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4); + + const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D + : GLSL_SAMPLER_DIM_2D, + /*is_array*/ true, GLSL_TYPE_FLOAT); + const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D + : GLSL_SAMPLER_DIM_2D, + /*is_array*/ true, GLSL_TYPE_FLOAT); - nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src"); + nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src"); img_src->data.binding = 0; - nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, img_type, "img_dst"); + nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst"); img_dst->data.binding = 1; nir_ssa_def *undef32 = nir_ssa_undef(&b, 1, 32); |