diff options
Diffstat (limited to 'src/broadcom/vulkan/v3dvx_meta_copy.c')
-rw-r--r-- | src/broadcom/vulkan/v3dvx_meta_copy.c | 1353 |
1 files changed, 1353 insertions, 0 deletions
diff --git a/src/broadcom/vulkan/v3dvx_meta_copy.c b/src/broadcom/vulkan/v3dvx_meta_copy.c new file mode 100644 index 00000000000..d1f629ff140 --- /dev/null +++ b/src/broadcom/vulkan/v3dvx_meta_copy.c @@ -0,0 +1,1353 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "v3dv_meta_copy.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +#include "vk_format_info.h" + +struct rcl_clear_info { + const union v3dv_clear_value *clear_value; + struct v3dv_image *image; + VkImageAspectFlags aspects; + uint32_t layer; + uint32_t level; +}; + +static struct v3dv_cl * +emit_rcl_prologue(struct v3dv_job *job, + struct framebuffer_data *fb, + const struct rcl_clear_info *clear_info) +{ + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + + struct v3dv_cl *rcl = &job->rcl; + v3dv_cl_ensure_space_with_branch(rcl, 200 + + tiling->layers * 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + if (job->cmd_buffer->state.oom) + return NULL; + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { + config.early_z_disable = true; + config.image_width_pixels = tiling->width; + config.image_height_pixels = tiling->height; + config.number_of_render_targets = 1; + config.multisample_mode_4x = tiling->msaa; + config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; + config.internal_depth_type = fb->internal_depth_type; + } + + if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { + uint32_t clear_pad = 0; + if (clear_info->image) { + const struct v3dv_image *image = clear_info->image; + const struct v3d_resource_slice *slice = + &image->slices[clear_info->level]; + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + int uif_block_height = v3d_utile_height(image->cpp) * 2; + + uint32_t implicit_padded_height = + align(tiling->height, uif_block_height) / uif_block_height; + + if (slice->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height >= 15) { + clear_pad = slice->padded_height_of_output_image_in_uif_blocks; + } + } + } + + const uint32_t *color = &clear_info->clear_value->color[0]; + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { + clear.clear_color_low_32_bits = color[0]; + clear.clear_color_next_24_bits = color[1] & 0x00ffffff; + clear.render_target_number = 0; + }; + + if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { + clear.clear_color_mid_low_32_bits = + ((color[1] >> 24) | (color[2] << 8)); + clear.clear_color_mid_high_24_bits = + ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); + clear.render_target_number = 0; + }; + } + + if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { + clear.uif_padded_height_in_uif_blocks = clear_pad; + clear.clear_color_high_16_bits = color[3] >> 16; + clear.render_target_number = 0; + }; + } + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { + rt.render_target_0_internal_bpp = tiling->internal_bpp; + rt.render_target_0_internal_type = fb->internal_type; + rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { + clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; + clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; + }; + + cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + return rcl; +} + +static void +emit_frame_setup(struct v3dv_job *job, + uint32_t layer, + const union v3dv_clear_value *clear_value) +{ + v3dv_return_if_oom(NULL, job); + + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + + struct v3dv_cl *rcl = &job->rcl; + + const uint32_t tile_alloc_offset = + 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; + cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); + } + + cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { + config.number_of_bin_tile_lists = 1; + config.total_frame_width_in_tiles = tiling->draw_tiles_x; + config.total_frame_height_in_tiles = tiling->draw_tiles_y; + + config.supertile_width_in_tiles = tiling->supertile_width; + config.supertile_height_in_tiles = tiling->supertile_height; + + config.total_frame_width_in_supertiles = + tiling->frame_width_in_supertiles; + config.total_frame_height_in_supertiles = + tiling->frame_height_in_supertiles; + } + + /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do + * it here. + */ + for (int i = 0; i < 2; i++) { + cl_emit(rcl, TILE_COORDINATES, coords); + cl_emit(rcl, END_OF_LOADS, end); + cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + if (clear_value && i == 0) { + cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = true; + clear.clear_all_render_targets = true; + } + } + cl_emit(rcl, END_OF_TILE_MARKER, end); + } + + cl_emit(rcl, FLUSH_VCD_CACHE, flush); +} + +static void +emit_supertile_coordinates(struct v3dv_job *job, + struct framebuffer_data *framebuffer) +{ + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl *rcl = &job->rcl; + + const uint32_t min_y = framebuffer->min_y_supertile; + const uint32_t max_y = framebuffer->max_y_supertile; + const uint32_t min_x = framebuffer->min_x_supertile; + const uint32_t max_x = framebuffer->max_x_supertile; + + for (int y = min_y; y <= max_y; y++) { + for (int x = min_x; x <= max_x; x++) { + cl_emit(rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } +} + +static void +emit_linear_load(struct v3dv_cl *cl, + uint32_t buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride, + uint32_t format) +{ + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = buffer; + load.address = v3dv_cl_address(bo, offset); + load.input_image_format = format; + load.memory_format = V3D_TILING_RASTER; + load.height_in_ub_or_stride = stride; + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_linear_store(struct v3dv_cl *cl, + uint32_t buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride, + bool msaa, + uint32_t format) +{ + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = RENDER_TARGET_0; + store.address = v3dv_cl_address(bo, offset); + store.clear_buffer_being_stored = false; + store.output_image_format = format; + store.memory_format = V3D_TILING_RASTER; + store.height_in_ub_or_stride = stride; + store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : + V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +/* This chooses a tile buffer format that is appropriate for the copy operation. + * Typically, this is the image render target type, however, if we are copying + * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so + * we need to load and store to/from a tile color buffer using a compatible + * color format. + */ +static uint32_t +choose_tlb_format(struct framebuffer_data *framebuffer, + VkImageAspectFlags aspect, + bool for_store, + bool is_copy_to_buffer, + bool is_copy_from_buffer) +{ + if (is_copy_to_buffer || is_copy_from_buffer) { + switch (framebuffer->vk_format) { + case VK_FORMAT_D16_UNORM: + return V3D_OUTPUT_IMAGE_FORMAT_R16UI; + case VK_FORMAT_D32_SFLOAT: + return V3D_OUTPUT_IMAGE_FORMAT_R32F; + case VK_FORMAT_X8_D24_UNORM_PACK32: + return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + case VK_FORMAT_D24_UNORM_S8_UINT: + /* When storing the stencil aspect of a combined depth/stencil image + * to a buffer, the Vulkan spec states that the output buffer must + * have packed stencil values, so we choose an R8UI format for our + * store outputs. For the load input we still want RGBA8UI since the + * source image contains 4 channels (including the 3 channels + * containing the 24-bit depth value). + * + * When loading the stencil aspect of a combined depth/stencil image + * from a buffer, we read packed 8-bit stencil values from the buffer + * that we need to put into the LSB of the 32-bit format (the R + * channel), so we use R8UI. For the store, if we used R8UI then we + * would write 8-bit stencil values consecutively over depth channels, + * so we need to use RGBA8UI. This will write each stencil value in + * its correct position, but will overwrite depth values (channels G + * B,A) with undefined values. To fix this, we will have to restore + * the depth aspect from the Z tile buffer, which we should pre-load + * from the image before the store). + */ + if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { + return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + } else { + assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); + if (is_copy_to_buffer) { + return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : + V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + } else { + assert(is_copy_from_buffer); + return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : + V3D_OUTPUT_IMAGE_FORMAT_R8UI; + } + } + default: /* Color formats */ + return framebuffer->format->rt_type; + break; + } + } else { + return framebuffer->format->rt_type; + } +} + +static inline bool +format_needs_rb_swap(struct v3dv_device *device, + VkFormat format) +{ + const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); + return swizzle[0] == PIPE_SWIZZLE_Z; +} + +static void +emit_image_load(struct v3dv_device *device, + struct v3dv_cl *cl, + struct framebuffer_data *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspect, + uint32_t layer, + uint32_t mip_level, + bool is_copy_to_buffer, + bool is_copy_from_buffer) +{ + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + + /* For image to/from buffer copies we always load to and store from RT0, + * even for depth/stencil aspects, because the hardware can't do raster + * stores or loads from/to the depth/stencil tile buffers. + */ + bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + aspect == VK_IMAGE_ASPECT_COLOR_BIT; + + const struct v3d_resource_slice *slice = &image->slices[mip_level]; + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = load_to_color_tlb ? + RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); + + load.address = v3dv_cl_address(image->mem->bo, layer_offset); + + load.input_image_format = choose_tlb_format(framebuffer, aspect, false, + is_copy_to_buffer, + is_copy_from_buffer); + load.memory_format = slice->tiling; + + /* When copying depth/stencil images to a buffer, for D24 formats Vulkan + * expects the depth value in the LSB bits of each 32-bit pixel. + * Unfortunately, the hardware seems to put the S8/X8 bits there and the + * depth bits on the MSB. To work around that we can reverse the channel + * order and then swap the R/B channels to get what we want. + * + * NOTE: reversing and swapping only gets us the behavior we want if the + * operations happen in that exact order, which seems to be the case when + * done on the tile buffer load operations. On the store, it seems the + * order is not the same. The order on the store is probably reversed so + * that reversing and swapping on both the load and the store preserves + * the original order of the channels in memory. + * + * Notice that we only need to do this when copying to a buffer, where + * depth and stencil aspects are copied as separate regions and + * the spec expects them to be tightly packed. + */ + bool needs_rb_swap = false; + bool needs_chan_reverse = false; + if (is_copy_to_buffer && + (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || + (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { + needs_rb_swap = true; + needs_chan_reverse = true; + } else if (!is_copy_from_buffer && !is_copy_to_buffer && + (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { + /* This is not a raw data copy (i.e. we are clearing the image), + * so we need to make sure we respect the format swizzle. + */ + needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); + } + + load.r_b_swap = needs_rb_swap; + load.channel_reverse = needs_chan_reverse; + + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + load.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + load.height_in_ub_or_stride = slice->stride; + } + + if (image->samples > VK_SAMPLE_COUNT_1_BIT) + load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_image_store(struct v3dv_device *device, + struct v3dv_cl *cl, + struct framebuffer_data *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspect, + uint32_t layer, + uint32_t mip_level, + bool is_copy_to_buffer, + bool is_copy_from_buffer, + bool is_multisample_resolve) +{ + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + + bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + aspect == VK_IMAGE_ASPECT_COLOR_BIT; + + const struct v3d_resource_slice *slice = &image->slices[mip_level]; + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = store_from_color_tlb ? + RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); + + store.address = v3dv_cl_address(image->mem->bo, layer_offset); + store.clear_buffer_being_stored = false; + + /* See rationale in emit_image_load() */ + bool needs_rb_swap = false; + bool needs_chan_reverse = false; + if (is_copy_from_buffer && + (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || + (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { + needs_rb_swap = true; + needs_chan_reverse = true; + } else if (!is_copy_from_buffer && !is_copy_to_buffer && + (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { + needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); + } + + store.r_b_swap = needs_rb_swap; + store.channel_reverse = needs_chan_reverse; + + store.output_image_format = choose_tlb_format(framebuffer, aspect, true, + is_copy_to_buffer, + is_copy_from_buffer); + store.memory_format = slice->tiling; + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + store.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + store.height_in_ub_or_stride = slice->stride; + } + + if (image->samples > VK_SAMPLE_COUNT_1_BIT) + store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else if (is_multisample_resolve) + store.decimate_mode = V3D_DECIMATE_MODE_4X; + else + store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, + struct framebuffer_data *framebuffer, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + uint32_t layer_offset, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + /* Load image to TLB */ + assert((image->type != VK_IMAGE_TYPE_3D && + layer_offset < region->imageSubresource.layerCount) || + layer_offset < image->extent.depth); + + const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ? + region->imageSubresource.baseArrayLayer + layer_offset : + region->imageOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, image, + region->imageSubresource.aspectMask, + image_layer, + region->imageSubresource.mipLevel, + true, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + /* Store TLB to buffer */ + uint32_t width, height; + if (region->bufferRowLength == 0) + width = region->imageExtent.width; + else + width = region->bufferRowLength; + + if (region->bufferImageHeight == 0) + height = region->imageExtent.height; + else + height = region->bufferImageHeight; + + /* Handle copy from compressed format */ + width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); + height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); + + /* If we are storing stencil from a combined depth/stencil format the + * Vulkan spec states that the output buffer must have packed stencil + * values, where each stencil value is 1 byte. + */ + uint32_t cpp = + region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? + 1 : image->cpp; + uint32_t buffer_stride = width * cpp; + uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + + height * buffer_stride * layer_offset; + + uint32_t format = choose_tlb_format(framebuffer, + region->imageSubresource.aspectMask, + true, true, false); + bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT; + + emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, + buffer_offset, buffer_stride, msaa, format); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_layer_to_buffer(struct v3dv_job *job, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + struct framebuffer_data *framebuffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + emit_frame_setup(job, layer, NULL); + emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, + image, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + struct framebuffer_data *framebuffer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_resolve_image_layer_per_tile_list(struct v3dv_job *job, + struct framebuffer_data *framebuffer, + struct v3dv_image *dst, + struct v3dv_image *src, + uint32_t layer_offset, + const VkImageResolve2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + assert((src->type != VK_IMAGE_TYPE_3D && + layer_offset < region->srcSubresource.layerCount) || + layer_offset < src->extent.depth); + + const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ? + region->srcSubresource.baseArrayLayer + layer_offset : + region->srcOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, src, + region->srcSubresource.aspectMask, + src_layer, + region->srcSubresource.mipLevel, + false, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + assert((dst->type != VK_IMAGE_TYPE_3D && + layer_offset < region->dstSubresource.layerCount) || + layer_offset < dst->extent.depth); + + const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ? + region->dstSubresource.baseArrayLayer + layer_offset : + region->dstOffset.z + layer_offset; + + emit_image_store(job->device, cl, framebuffer, dst, + region->dstSubresource.aspectMask, + dst_layer, + region->dstSubresource.mipLevel, + false, false, true); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_resolve_image_layer(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct framebuffer_data *framebuffer, + uint32_t layer, + const VkImageResolve2KHR *region) +{ + emit_frame_setup(job, layer, NULL); + emit_resolve_image_layer_per_tile_list(job, framebuffer, + dst, src, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_resolve_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct framebuffer_data *framebuffer, + const VkImageResolve2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_resolve_image_layer(job, dst, src, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_copy_buffer_per_tile_list(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + uint32_t stride, + uint32_t format) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_linear_store(cl, RENDER_TARGET_0, + dst, dst_offset, stride, false, format); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +void +v3dX(job_emit_copy_buffer)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct framebuffer_data *framebuffer, + uint32_t format, + uint32_t item_size) +{ + const uint32_t stride = job->frame_tiling.width * item_size; + emit_copy_buffer_per_tile_list(job, dst, src, + dst_offset, src_offset, + stride, format); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_copy_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct framebuffer_data *framebuffer, + uint32_t format, + uint32_t item_size) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + + v3dX(job_emit_copy_buffer)(job, dst, src, dst_offset, src_offset, + framebuffer, format, item_size); + + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_copy_image_layer_per_tile_list(struct v3dv_job *job, + struct framebuffer_data *framebuffer, + struct v3dv_image *dst, + struct v3dv_image *src, + uint32_t layer_offset, + const VkImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + assert((src->type != VK_IMAGE_TYPE_3D && + layer_offset < region->srcSubresource.layerCount) || + layer_offset < src->extent.depth); + + const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ? + region->srcSubresource.baseArrayLayer + layer_offset : + region->srcOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, src, + region->srcSubresource.aspectMask, + src_layer, + region->srcSubresource.mipLevel, + false, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + assert((dst->type != VK_IMAGE_TYPE_3D && + layer_offset < region->dstSubresource.layerCount) || + layer_offset < dst->extent.depth); + + const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ? + region->dstSubresource.baseArrayLayer + layer_offset : + region->dstOffset.z + layer_offset; + + emit_image_store(job->device, cl, framebuffer, dst, + region->dstSubresource.aspectMask, + dst_layer, + region->dstSubresource.mipLevel, + false, false, false); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_image_layer(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct framebuffer_data *framebuffer, + uint32_t layer, + const VkImageCopy2KHR *region) +{ + emit_frame_setup(job, layer, NULL); + emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_copy_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct framebuffer_data *framebuffer, + const VkImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_image_layer(job, dst, src, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +void +v3dX(cmd_buffer_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *dst, + uint32_t dst_mip_level, + uint32_t dst_layer, + struct v3dv_image *src, + uint32_t src_mip_level, + uint32_t src_layer, + uint32_t width, + uint32_t height, + const struct v3dv_format *format) +{ + const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; + const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; + + assert(dst->mem && dst->mem->bo); + const struct v3dv_bo *dst_bo = dst->mem->bo; + + assert(src->mem && src->mem->bo); + const struct v3dv_bo *src_bo = src->mem->bo; + + struct drm_v3d_submit_tfu tfu = { + .ios = (height << 16) | width, + .bo_handles = { + dst_bo->handle, + src_bo->handle != dst_bo->handle ? src_bo->handle : 0 + }, + }; + + const uint32_t src_offset = + src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); + tfu.iia |= src_offset; + + uint32_t icfg; + if (src_slice->tiling == V3D_TILING_RASTER) { + icfg = V3D_TFU_ICFG_FORMAT_RASTER; + } else { + icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + + (src_slice->tiling - V3D_TILING_LINEARTILE); + } + tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; + + const uint32_t dst_offset = + dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); + tfu.ioa |= dst_offset; + + tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + + (dst_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D_TFU_IOA_FORMAT_SHIFT; + tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; + + switch (src_slice->tiling) { + case V3D_TILING_UIF_NO_XOR: + case V3D_TILING_UIF_XOR: + tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); + break; + case V3D_TILING_RASTER: + tfu.iis |= src_slice->stride / src->cpp; + break; + default: + break; + } + + /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the + * OPAD field for the destination (how many extra UIF blocks beyond + * those necessary to cover the height). + */ + if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR || + dst_slice->tiling == V3D_TILING_UIF_XOR) { + uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); + uint32_t implicit_padded_height = align(height, uif_block_h); + uint32_t icfg = + (dst_slice->padded_height - implicit_padded_height) / uif_block_h; + tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; + } + + v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); +} + +static void +emit_clear_image_per_tile_list(struct v3dv_job *job, + struct framebuffer_data *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspects, + uint32_t layer, + uint32_t level) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_image_store(job->device, cl, framebuffer, image, aspects, + layer, level, false, false, false); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_clear_image(struct v3dv_job *job, + struct v3dv_image *image, + struct framebuffer_data *framebuffer, + VkImageAspectFlags aspects, + uint32_t layer, + uint32_t level) +{ + emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_clear_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct framebuffer_data *framebuffer, + const union v3dv_clear_value *clear_value, + VkImageAspectFlags aspects, + uint32_t layer, + uint32_t level) +{ + const struct rcl_clear_info clear_info = { + .clear_value = clear_value, + .image = image, + .aspects = aspects, + .layer = layer, + .level = level, + }; + + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, clear_value); + emit_clear_image(job, image, framebuffer, aspects, layer, level); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_fill_buffer_per_tile_list(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, + V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_fill_buffer(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + struct framebuffer_data *framebuffer) +{ + const uint32_t stride = job->frame_tiling.width * 4; + emit_fill_buffer_per_tile_list(job, bo, offset, stride); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_fill_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + struct framebuffer_data *framebuffer, + uint32_t data) +{ + const union v3dv_clear_value clear_value = { + .color = { data, 0, 0, 0 }, + }; + + const struct rcl_clear_info clear_info = { + .clear_value = &clear_value, + .image = NULL, + .aspects = VK_IMAGE_ASPECT_COLOR_BIT, + .layer = 0, + .level = 0, + }; + + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, &clear_value); + emit_fill_buffer(job, bo, offset, framebuffer); + cl_emit(rcl, END_OF_RENDERING, end); +} + + +static void +emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, + struct framebuffer_data *framebuffer, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; + assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || + layer < image->extent.depth); + + /* Load TLB from buffer */ + uint32_t width, height; + if (region->bufferRowLength == 0) + width = region->imageExtent.width; + else + width = region->bufferRowLength; + + if (region->bufferImageHeight == 0) + height = region->imageExtent.height; + else + height = region->bufferImageHeight; + + /* Handle copy to compressed format using a compatible format */ + width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); + height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); + + uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? + 1 : image->cpp; + uint32_t buffer_stride = width * cpp; + uint32_t buffer_offset = + buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; + + uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, + false, false, true); + + emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, + buffer_offset, buffer_stride, format); + + /* Because we can't do raster loads/stores of Z/S formats we need to + * use a color tile buffer with a compatible RGBA color format instead. + * However, when we are uploading a single aspect to a combined + * depth/stencil image we have the problem that our tile buffer stores don't + * allow us to mask out the other aspect, so we always write all four RGBA + * channels to the image and we end up overwriting that other aspect with + * undefined values. To work around that, we first load the aspect we are + * not copying from the image memory into a proper Z/S tile buffer. Then we + * do our store from the color buffer for the aspect we are copying, and + * after that, we do another store from the Z/S tile buffer to restore the + * other aspect to its original value. + */ + if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + emit_image_load(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false); + } else { + assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); + emit_image_load(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_DEPTH_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false); + } + } + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + /* Store TLB to image */ + emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, true, false); + + if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + emit_image_store(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false, false); + } else { + assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); + emit_image_store(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_DEPTH_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false, false); + } + } + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_buffer_to_layer(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + struct framebuffer_data *framebuffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + emit_frame_setup(job, layer, NULL); + emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, + layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(job_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + struct framebuffer_data *framebuffer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +/* Figure out a TLB size configuration for a number of pixels to process. + * Beware that we can't "render" more than 4096x4096 pixels in a single job, + * if the pixel count is larger than this, the caller might need to split + * the job and call this function multiple times. + */ +static void +framebuffer_size_for_pixel_count(uint32_t num_pixels, + uint32_t *width, + uint32_t *height) +{ + assert(num_pixels > 0); + + const uint32_t max_dim_pixels = 4096; + const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; + + uint32_t w, h; + if (num_pixels > max_pixels) { + w = max_dim_pixels; + h = max_dim_pixels; + } else { + w = num_pixels; + h = 1; + while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { + w >>= 1; + h <<= 1; + } + } + assert(w <= max_dim_pixels && h <= max_dim_pixels); + assert(w * h <= num_pixels); + assert(w > 0 && h > 0); + + *width = w; + *height = h; +} + +struct v3dv_job * +v3dX(cmd_buffer_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *dst, + uint32_t dst_offset, + struct v3dv_bo *src, + uint32_t src_offset, + const VkBufferCopy2KHR *region) +{ + const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; + const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; + + /* Select appropriate pixel format for the copy operation based on the + * size to copy and the alignment of the source and destination offsets. + */ + src_offset += region->srcOffset; + dst_offset += region->dstOffset; + uint32_t item_size = 4; + while (item_size > 1 && + (src_offset % item_size != 0 || dst_offset % item_size != 0)) { + item_size /= 2; + } + + while (item_size > 1 && region->size % item_size != 0) + item_size /= 2; + + assert(region->size % item_size == 0); + uint32_t num_items = region->size / item_size; + assert(num_items > 0); + + uint32_t format; + VkFormat vk_format; + switch (item_size) { + case 4: + format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + vk_format = VK_FORMAT_R8G8B8A8_UINT; + break; + case 2: + format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; + vk_format = VK_FORMAT_R8G8_UINT; + break; + default: + format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; + vk_format = VK_FORMAT_R8_UINT; + break; + } + + struct v3dv_job *job = NULL; + while (num_items > 0) { + job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); + if (!job) + return NULL; + + uint32_t width, height; + framebuffer_size_for_pixel_count(num_items, &width, &height); + + v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false); + + struct framebuffer_data framebuffer; + v3dX(setup_framebuffer_data)(&framebuffer, vk_format, internal_type, + &job->frame_tiling); + + v3dX(job_emit_binning_flush)(job); + + v3dX(job_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset, + &framebuffer, format, item_size); + + v3dv_cmd_buffer_finish_job(cmd_buffer); + + const uint32_t items_copied = width * height; + const uint32_t bytes_copied = items_copied * item_size; + num_items -= items_copied; + src_offset += bytes_copied; + dst_offset += bytes_copied; + } + + return job; +} + +void +v3dX(cmd_buffer_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t size, + uint32_t data) +{ + assert(size > 0 && size % 4 == 0); + assert(offset + size <= bo->size); + + const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; + const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; + uint32_t num_items = size / 4; + + while (num_items > 0) { + struct v3dv_job *job = + v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); + if (!job) + return; + + uint32_t width, height; + framebuffer_size_for_pixel_count(num_items, &width, &height); + + v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false); + + struct framebuffer_data framebuffer; + v3dX(setup_framebuffer_data)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, + internal_type, &job->frame_tiling); + + v3dX(job_emit_binning_flush)(job); + + v3dX(job_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data); + + v3dv_cmd_buffer_finish_job(cmd_buffer); + + const uint32_t items_copied = width * height; + const uint32_t bytes_copied = items_copied * 4; + num_items -= items_copied; + offset += bytes_copied; + } +} + +void +v3dX(setup_framebuffer_data)(struct framebuffer_data *fb, + VkFormat vk_format, + uint32_t internal_type, + const struct v3dv_frame_tiling *tiling) +{ + fb->internal_type = internal_type; + + /* Supertile coverage always starts at 0,0 */ + uint32_t supertile_w_in_pixels = + tiling->tile_width * tiling->supertile_width; + uint32_t supertile_h_in_pixels = + tiling->tile_height * tiling->supertile_height; + + fb->min_x_supertile = 0; + fb->min_y_supertile = 0; + fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; + fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; + + fb->vk_format = vk_format; + fb->format = v3dX(get_format)(vk_format); + + fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; + if (vk_format_is_depth_or_stencil(vk_format)) + fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format); +} |