mesa/st: Add st_compute_transcode_astc_to_dxt5

Add a function to upload ASTC data, transcoding it to BC3/DXT5 in the process. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19827>
author: Nanley Chery <nanley.g.chery@intel.com> 2022-12-12 05:12:46 -0800
committer: Marge Bot <emma+marge@anholt.net> 2023-02-10 01:40:47 +0000
commit: 637ff224132d3c839f27a76d8a3a10984892cb28 (patch)
tree: d00b4d7b7c8536df6fa0071fadb69df461f97e1e /src/mesa/state_tracker
parent: 1f8f1a0d9bd33700da52022ab8f93c55148dd2e5 (diff)
2 files changed, 437 insertions, 0 deletions
diff --git a/src/mesa/state_tracker/st_texcompress_compute.c b/src/mesa/state_tracker/st_texcompress_compute.c
index d792fde8f7c..3c6adc2a685 100644
--- a/src/mesa/state_tracker/st_texcompress_compute.c
+++ b/src/mesa/state_tracker/st_texcompress_compute.c
@@ -23,16 +23,30 @@
  *
  **************************************************************************/
 
+#include "compiler/glsl/bc1_glsl.h"
+#include "compiler/glsl/bc4_glsl.h"
+#include "compiler/glsl/cross_platform_settings_piece_all.h"
+#include "compiler/glsl/etc2_rgba_stitch_glsl.h"
+
+#include "main/context.h"
 #include "main/shaderapi.h"
 #include "main/shaderobj.h"
+#include "main/texcompress_astc.h"
+#include "main/uniforms.h"
 
+#include "state_tracker/st_atom_constbuf.h"
 #include "state_tracker/st_bc1_tables.h"
 #include "state_tracker/st_context.h"
+#include "state_tracker/st_program.h"
 #include "state_tracker/st_texcompress_compute.h"
+#include "state_tracker/st_texture.h"
 
 #include "util/u_string.h"
 
 enum compute_program_id {
+   COMPUTE_PROGRAM_BC1,
+   COMPUTE_PROGRAM_BC4,
+   COMPUTE_PROGRAM_STITCH,
    COMPUTE_PROGRAM_COUNT
 };
 
@@ -110,6 +124,366 @@ create_bc1_endpoint_ssbo(struct pipe_context *pipe)
    return buffer;
 }
 
+static void
+bind_compute_state(struct st_context *st,
+                   struct gl_program *prog,
+                   struct pipe_sampler_view **sampler_views,
+                   const struct pipe_shader_buffer *shader_buffers,
+                   const struct pipe_image_view *image_views,
+                   bool cs_handle_from_prog,
+                   bool constbuf0_from_prog)
+{
+   assert(prog->info.stage == PIPE_SHADER_COMPUTE);
+
+   /* Set compute states in the same order as defined in st_atom_list.h */
+
+   assert(prog->affected_states & ST_NEW_CS_STATE);
+   assert(st->shader_has_one_variant[PIPE_SHADER_COMPUTE]);
+   cso_set_compute_shader_handle(st->cso_context,
+                                 cs_handle_from_prog ?
+                                 prog->variants->driver_shader : NULL);
+
+   if (prog->affected_states & ST_NEW_CS_SAMPLER_VIEWS) {
+      st->pipe->set_sampler_views(st->pipe, prog->info.stage, 0,
+                                  prog->info.num_textures, 0, false,
+                                  sampler_views);
+   }
+
+   if (prog->affected_states & ST_NEW_CS_SAMPLERS) {
+      /* Programs seem to set this bit more often than needed. For example, if
+       * a program only uses texelFetch, this shouldn't be needed. Section
+       * "11.1.3.2 Texel Fetches", of the GL 4.6 spec says:
+       *
+       *    Texel fetch proceeds similarly to the steps described for texture
+       *    access in section 11.1.3.5, with the exception that none of the
+       *    operations controlled by sampler object state are performed,
+       *
+       * We assume that the program is using texelFetch or doesn't care about
+       * this state for a similar reason.
+       *
+       * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/8014.
+       */
+   }
+
+   if (prog->affected_states & ST_NEW_CS_CONSTANTS) {
+      st_upload_constants(st, constbuf0_from_prog ? prog : NULL,
+                          prog->info.stage);
+   }
+
+   if (prog->affected_states & ST_NEW_CS_UBOS) {
+      unreachable("Uniform buffer objects not handled");
+   }
+
+   if (prog->affected_states & ST_NEW_CS_ATOMICS) {
+      unreachable("Atomic buffer objects not handled");
+   }
+
+   if (prog->affected_states & ST_NEW_CS_SSBOS) {
+      st->pipe->set_shader_buffers(st->pipe, prog->info.stage, 0,
+                                   prog->info.num_ssbos, shader_buffers,
+                                   prog->sh.ShaderStorageBlocksWriteAccess);
+   }
+
+   if (prog->affected_states & ST_NEW_CS_IMAGES) {
+      st->pipe->set_shader_images(st->pipe, prog->info.stage, 0,
+                                  prog->info.num_images, 0, image_views);
+   }
+}
+
+static void
+dispatch_compute_state(struct st_context *st,
+                       struct gl_program *prog,
+                       struct pipe_sampler_view **sampler_views,
+                       const struct pipe_shader_buffer *shader_buffers,
+                       const struct pipe_image_view *image_views,
+                       unsigned num_workgroups_x,
+                       unsigned num_workgroups_y,
+                       unsigned num_workgroups_z)
+{
+   assert(prog->info.stage == PIPE_SHADER_COMPUTE);
+
+   /* Bind the state */
+   bind_compute_state(st, prog, sampler_views, shader_buffers, image_views,
+                      true, true);
+
+   /* Launch the grid */
+   const struct pipe_grid_info info = {
+      .block[0] = prog->info.workgroup_size[0],
+      .block[1] = prog->info.workgroup_size[1],
+      .block[2] = prog->info.workgroup_size[2],
+      .grid[0] = num_workgroups_x,
+      .grid[1] = num_workgroups_y,
+      .grid[2] = num_workgroups_z,
+   };
+
+   st->pipe->launch_grid(st->pipe, &info);
+
+   /* Unbind the state */
+   bind_compute_state(st, prog, NULL, NULL, NULL, false, false);
+
+   /* If the previously used compute program was relying on any state that was
+    * trampled on by these state changes, dirty the relevant flags.
+    */
+   if (st->cp) {
+      st->ctx->NewDriverState |=
+         st->cp->affected_states & prog->affected_states;
+   }
+}
+
+static struct pipe_resource *
+cs_encode_bc1(struct st_context *st,
+              struct pipe_resource *rgba8_tex)
+{
+   /* Create the required compute state */
+   struct gl_program *prog =
+      get_compute_program(st, COMPUTE_PROGRAM_BC1, bc1_source,
+                          cross_platform_settings_piece_all_header);
+   if (!prog)
+      return NULL;
+
+   /* ... complete the program setup by defining the number of refinements to
+    * do on the created blocks. The program will attempt to create a more
+    * accurate encoding on each iteration. Doing at least one refinement
+    * provides a significant improvement in quality and is needed to give a
+    * result comparable to the CPU encoder (according to piglit tests).
+    * Additional refinements don't help as much.
+    */
+   const unsigned num_refinements = 1;
+   _mesa_uniform(0, 1, &num_refinements, st->ctx, prog->shader_program,
+                 GLSL_TYPE_UINT, 1);
+
+   const struct pipe_sampler_view templ = {
+      .target = PIPE_TEXTURE_2D,
+      .format = PIPE_FORMAT_R8G8B8A8_UNORM,
+      .swizzle_r = PIPE_SWIZZLE_X,
+      .swizzle_g = PIPE_SWIZZLE_Y,
+      .swizzle_b = PIPE_SWIZZLE_Z,
+      .swizzle_a = PIPE_SWIZZLE_W,
+   };
+   struct pipe_sampler_view *rgba8_view =
+      st->pipe->create_sampler_view(st->pipe, rgba8_tex, &templ);
+   if (!rgba8_view)
+      return NULL;
+
+   const struct pipe_shader_buffer ssbo = {
+      .buffer = st->texcompress_compute.bc1_endpoint_buf,
+      .buffer_size = st->texcompress_compute.bc1_endpoint_buf->width0,
+   };
+
+   struct pipe_resource *bc1_tex =
+      st_texture_create(st, PIPE_TEXTURE_2D, PIPE_FORMAT_R32G32_UINT, 0,
+                        DIV_ROUND_UP(rgba8_tex->width0, 4),
+                        DIV_ROUND_UP(rgba8_tex->height0, 4), 1, 1, 0,
+                        PIPE_BIND_SHADER_IMAGE |
+                        PIPE_BIND_SAMPLER_VIEW, false);
+   if (!bc1_tex)
+      goto release_sampler_views;
+
+   const struct pipe_image_view image = {
+      .resource = bc1_tex,
+      .format = PIPE_FORMAT_R16G16B16A16_UINT,
+      .access = PIPE_IMAGE_ACCESS_WRITE,
+      .shader_access = PIPE_IMAGE_ACCESS_WRITE,
+   };
+
+   /* Dispatch the compute state */
+   dispatch_compute_state(st, prog, &rgba8_view, &ssbo, &image,
+                          DIV_ROUND_UP(rgba8_tex->width0, 32),
+                          DIV_ROUND_UP(rgba8_tex->height0, 32), 1);
+
+release_sampler_views:
+   pipe_sampler_view_reference(&rgba8_view, NULL);
+
+   return bc1_tex;
+}
+
+static struct pipe_resource *
+cs_encode_bc4(struct st_context *st,
+              struct pipe_resource *rgba8_tex,
+              enum pipe_swizzle component, bool use_snorm)
+{
+   /* Create the required compute state */
+   struct gl_program *prog =
+      get_compute_program(st, COMPUTE_PROGRAM_BC4, bc4_source,
+                          cross_platform_settings_piece_all_header);
+   if (!prog)
+      return NULL;
+
+   /* ... complete the program setup by picking the channel to encode and
+    * whether to encode it as snorm. The shader doesn't actually support
+    * channel index 2. So, pick index 0 and rely on swizzling instead.
+    */
+   const unsigned params[] = { 0, use_snorm };
+   _mesa_uniform(0, 1, params, st->ctx, prog->shader_program,
+                 GLSL_TYPE_UINT, 2);
+
+   const struct pipe_sampler_view templ = {
+      .target = PIPE_TEXTURE_2D,
+      .format = PIPE_FORMAT_R8G8B8A8_UNORM,
+      .swizzle_r = component,
+      .swizzle_g = PIPE_SWIZZLE_0,
+      .swizzle_b = PIPE_SWIZZLE_0,
+      .swizzle_a = PIPE_SWIZZLE_1,
+   };
+   struct pipe_sampler_view *rgba8_view =
+      st->pipe->create_sampler_view(st->pipe, rgba8_tex, &templ);
+   if (!rgba8_view)
+      return NULL;
+
+   struct pipe_resource *bc4_tex =
+      st_texture_create(st, PIPE_TEXTURE_2D, PIPE_FORMAT_R32G32_UINT, 0,
+                        DIV_ROUND_UP(rgba8_tex->width0, 4),
+                        DIV_ROUND_UP(rgba8_tex->height0, 4), 1, 1, 0,
+                        PIPE_BIND_SHADER_IMAGE |
+                        PIPE_BIND_SAMPLER_VIEW, false);
+   if (!bc4_tex)
+      goto release_sampler_views;
+
+   const struct pipe_image_view image = {
+      .resource = bc4_tex,
+      .format = PIPE_FORMAT_R16G16B16A16_UINT,
+      .access = PIPE_IMAGE_ACCESS_WRITE,
+      .shader_access = PIPE_IMAGE_ACCESS_WRITE,
+   };
+
+   /* Dispatch the compute state */
+   dispatch_compute_state(st, prog, &rgba8_view, NULL, &image, 1,
+                          DIV_ROUND_UP(rgba8_tex->width0, 16),
+                          DIV_ROUND_UP(rgba8_tex->height0, 16));
+
+release_sampler_views:
+   pipe_sampler_view_reference(&rgba8_view, NULL);
+
+   return bc4_tex;
+}
+
+static struct pipe_resource *
+cs_stitch_64bpb_textures(struct st_context *st,
+                         struct pipe_resource *tex_hi,
+                         struct pipe_resource *tex_lo)
+{
+   assert(util_format_get_blocksizebits(tex_hi->format) == 64);
+   assert(util_format_get_blocksizebits(tex_lo->format) == 64);
+   assert(tex_hi->width0 == tex_lo->width0);
+   assert(tex_hi->height0 == tex_lo->height0);
+
+   struct pipe_resource *stitched_tex = NULL;
+
+   /* Create the required compute state */
+   struct gl_program *prog =
+      get_compute_program(st, COMPUTE_PROGRAM_STITCH, etc2_rgba_stitch_source,
+                          cross_platform_settings_piece_all_header);
+   if (!prog)
+      return NULL;
+
+   const struct pipe_sampler_view templ = {
+      .target = PIPE_TEXTURE_2D,
+      .format = PIPE_FORMAT_R32G32_UINT,
+      .swizzle_r = PIPE_SWIZZLE_X,
+      .swizzle_g = PIPE_SWIZZLE_Y,
+      .swizzle_b = PIPE_SWIZZLE_0,
+      .swizzle_a = PIPE_SWIZZLE_1,
+   };
+   struct pipe_sampler_view *rg32_views[2] = {
+      [0] = st->pipe->create_sampler_view(st->pipe, tex_hi, &templ),
+      [1] = st->pipe->create_sampler_view(st->pipe, tex_lo, &templ),
+   };
+   if (!rg32_views[0] || !rg32_views[1])
+      goto release_sampler_views;
+
+   stitched_tex =
+      st_texture_create(st, PIPE_TEXTURE_2D, PIPE_FORMAT_R32G32B32A32_UINT, 0,
+                        tex_hi->width0,
+                        tex_hi->height0, 1, 1, 0,
+                        PIPE_BIND_SHADER_IMAGE |
+                        PIPE_BIND_SAMPLER_VIEW, false);
+   if (!stitched_tex)
+      goto release_sampler_views;
+
+   const struct pipe_image_view image = {
+      .resource = stitched_tex,
+      .format = PIPE_FORMAT_R32G32B32A32_UINT,
+      .access = PIPE_IMAGE_ACCESS_WRITE,
+      .shader_access = PIPE_IMAGE_ACCESS_WRITE,
+   };
+
+   /* Dispatch the compute state */
+   dispatch_compute_state(st, prog, rg32_views, NULL, &image,
+                          DIV_ROUND_UP(tex_hi->width0, 8),
+                          DIV_ROUND_UP(tex_hi->height0, 8), 1);
+
+release_sampler_views:
+   pipe_sampler_view_reference(&rg32_views[0], NULL);
+   pipe_sampler_view_reference(&rg32_views[1], NULL);
+
+   return stitched_tex;
+}
+
+static struct pipe_resource *
+cs_encode_bc3(struct st_context *st,
+              struct pipe_resource *rgba8_tex)
+{
+   struct pipe_resource *bc3_tex = NULL;
+
+   /* Encode RGB channels as BC1. */
+   struct pipe_resource *bc1_tex = cs_encode_bc1(st, rgba8_tex);
+   if (!bc1_tex)
+      return NULL;
+
+   /* Encode alpha channels as BC4. */
+   struct pipe_resource *bc4_tex =
+      cs_encode_bc4(st, rgba8_tex, PIPE_SWIZZLE_W, false);
+   if (!bc4_tex)
+      goto release_textures;
+
+   st->pipe->memory_barrier(st->pipe, PIPE_BARRIER_TEXTURE);
+
+   /* Combine BC1 and BC4 to create BC3. */
+   bc3_tex = cs_stitch_64bpb_textures(st, bc1_tex, bc4_tex);
+   if (!bc3_tex)
+      goto release_textures;
+
+release_textures:
+   pipe_resource_reference(&bc1_tex, NULL);
+   pipe_resource_reference(&bc4_tex, NULL);
+
+   return bc3_tex;
+}
+
+static struct pipe_resource *
+sw_decode_astc(struct st_context *st,
+               uint8_t *astc_data,
+               unsigned astc_stride,
+               mesa_format astc_format,
+               unsigned width_px, unsigned height_px)
+{
+   /* Create the destination */
+   struct pipe_resource *rgba8_tex =
+      st_texture_create(st, PIPE_TEXTURE_2D, PIPE_FORMAT_R8G8B8A8_UNORM, 0,
+                        width_px, height_px, 1, 1, 0,
+                        PIPE_BIND_SAMPLER_VIEW, false);
+   if (!rgba8_tex)
+      return NULL;
+
+   /* Temporarily map the destination and decode into the returned pointer */
+   struct pipe_transfer *rgba8_xfer;
+   void *rgba8_map = pipe_texture_map(st->pipe, rgba8_tex, 0, 0,
+                                      PIPE_MAP_WRITE, 0, 0,
+                                      width_px, height_px, &rgba8_xfer);
+   if (!rgba8_map) {
+      pipe_resource_reference(&rgba8_tex, NULL);
+      return NULL;
+   }
+
+   _mesa_unpack_astc_2d_ldr(rgba8_map, rgba8_xfer->stride,
+                            astc_data, astc_stride,
+                            width_px, height_px, astc_format);
+
+   pipe_texture_unmap(st->pipe, rgba8_xfer);
+
+   return rgba8_tex;
+}
+
 bool
 st_init_texcompress_compute(struct st_context *st)
 {
@@ -138,3 +512,50 @@ st_destroy_texcompress_compute(struct st_context *st)
    /* Destroy the SSBO used by the BC1 shader program. */
    pipe_resource_reference(&st->texcompress_compute.bc1_endpoint_buf, NULL);
 }
+
+/* See st_texcompress_compute.h for more information. */
+bool
+st_compute_transcode_astc_to_dxt5(struct st_context *st,
+                                  uint8_t *astc_data,
+                                  unsigned astc_stride,
+                                  mesa_format astc_format,
+                                  struct pipe_resource *dxt5_tex,
+                                  unsigned dxt5_level,
+                                  unsigned dxt5_layer)
+{
+   assert(_mesa_has_compute_shaders(st->ctx));
+   assert(_mesa_is_format_astc_2d(astc_format));
+   assert(dxt5_tex->format == PIPE_FORMAT_DXT5_RGBA ||
+          dxt5_tex->format == PIPE_FORMAT_DXT5_SRGBA);
+   assert(dxt5_level <= dxt5_tex->last_level);
+   assert(dxt5_layer <= util_max_layer(dxt5_tex, dxt5_level));
+
+   bool success = false;
+
+   /* Decode ASTC to RGBA8. */
+   struct pipe_resource *rgba8_tex =
+      sw_decode_astc(st, astc_data, astc_stride, astc_format,
+                     u_minify(dxt5_tex->width0, dxt5_level),
+                     u_minify(dxt5_tex->height0, dxt5_level));
+   if (!rgba8_tex)
+      return false;
+
+   /* Encode RGBA8 to BC3. */
+   struct pipe_resource *bc3_tex = cs_encode_bc3(st, rgba8_tex);
+   if (!bc3_tex)
+      goto release_textures;
+
+   /* Upload the result. */
+   struct pipe_box src_box;
+   u_box_origin_2d(bc3_tex->width0, bc3_tex->height0, &src_box);
+   st->pipe->resource_copy_region(st->pipe, dxt5_tex, dxt5_level,
+                                  0, 0, dxt5_layer, bc3_tex, 0, &src_box);
+
+   success = true;
+
+release_textures:
+   pipe_resource_reference(&rgba8_tex, NULL);
+   pipe_resource_reference(&bc3_tex, NULL);
+
+   return success;
+}
diff --git a/src/mesa/state_tracker/st_texcompress_compute.h b/src/mesa/state_tracker/st_texcompress_compute.h
index f438d5f3563..fb2f08fdae7 100644
--- a/src/mesa/state_tracker/st_texcompress_compute.h
+++ b/src/mesa/state_tracker/st_texcompress_compute.h
@@ -32,4 +32,20 @@ st_init_texcompress_compute(struct st_context *st);
 void
 st_destroy_texcompress_compute(struct st_context *st);
 
+/**
+ * When this function returns true, the destination image will contain the
+ * contents of astc_data but transcoded to DXT5/BC3.
+ *
+ * Note that this function will internally create compute programs by using
+ * glCreateShaderProgramv with the application's GL context.
+ */
+bool
+st_compute_transcode_astc_to_dxt5(struct st_context *st,
+                                  uint8_t *astc_data,
+                                  unsigned astc_stride,
+                                  mesa_format astc_format,
+                                  struct pipe_resource *dxt5_tex,
+                                  unsigned dxt5_level,
+                                  unsigned dxt5_layer);
+
 #endif /* ST_TEXCOMPRESS_COMPUTE_H */
author	Nanley Chery <nanley.g.chery@intel.com>	2022-12-12 05:12:46 -0800
committer	Marge Bot <emma+marge@anholt.net>	2023-02-10 01:40:47 +0000
commit	637ff224132d3c839f27a76d8a3a10984892cb28 (patch)
tree	d00b4d7b7c8536df6fa0071fadb69df461f97e1e /src/mesa/state_tracker
parent	1f8f1a0d9bd33700da52022ab8f93c55148dd2e5 (diff)