summaryrefslogtreecommitdiff
path: root/src/broadcom/vulkan/v3dv_cmd_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/broadcom/vulkan/v3dv_cmd_buffer.c')
-rw-r--r--src/broadcom/vulkan/v3dv_cmd_buffer.c233
1 files changed, 159 insertions, 74 deletions
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 75845de803f..75977333389 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -53,6 +53,9 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer);
static void
subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
+static void
+emit_rcl(struct v3dv_cmd_buffer *cmd_buffer);
+
VkResult
v3dv_CreateCommandPool(VkDevice _device,
const VkCommandPoolCreateInfo *pCreateInfo,
@@ -167,6 +170,79 @@ emit_binning_flush(struct v3dv_job *job)
cl_emit(&job->bcl, FLUSH, flush);
}
+static bool
+attachment_list_is_subset(struct v3dv_subpass_attachment *l1, uint32_t l1_count,
+ struct v3dv_subpass_attachment *l2, uint32_t l2_count)
+{
+ for (uint32_t i = 0; i < l1_count; i++) {
+ uint32_t attachment_idx = l1[i].attachment;
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ uint32_t j;
+ for (j = 0; j < l2_count; j++) {
+ if (l2[j].attachment == attachment_idx)
+ break;
+ }
+ if (j == l2_count)
+ return false;
+ }
+
+ return true;
+ }
+
+static bool
+cmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ assert(state->pass);
+
+ const struct v3dv_physical_device *physical_device =
+ &cmd_buffer->device->instance->physicalDevice;
+
+ if (!physical_device->options.merge_jobs)
+ return false;
+
+ /* Each render pass starts a new job */
+ if (state->subpass_idx == 0)
+ return false;
+
+ /* Two subpasses can be merged in the same job if we can emit a single RCL
+ * for them (since the RCL includes the END_OF_RENDERING command that
+ * triggers the "render job finished" interrupt). We can do this so long
+ * as both subpasses render against the same attachments.
+ */
+ uint32_t prev_subpass_idx = state->subpass_idx - 1;
+ struct v3dv_subpass *prev_subpass = &state->pass->subpasses[prev_subpass_idx];
+ struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx];
+
+ /* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED,
+ * we need to check that for each subpass all its used attachments are
+ * used by the other subpass.
+ */
+ bool compatible =
+ attachment_list_is_subset(prev_subpass->color_attachments,
+ prev_subpass->color_count,
+ subpass->color_attachments,
+ subpass->color_count);
+ if (!compatible)
+ return false;
+
+ compatible =
+ attachment_list_is_subset(subpass->color_attachments,
+ subpass->color_count,
+ prev_subpass->color_attachments,
+ prev_subpass->color_count);
+ if (!compatible)
+ return false;
+
+ /* FIXME: resolve attachments */
+
+ /* FIXME: also check depth/stencil attachment */
+
+ return true;
+}
+
void
v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
{
@@ -174,6 +250,17 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
assert(job);
assert(v3dv_cl_offset(&job->bcl) != 0);
+ /* When we merge multiple subpasses into the same job we must only emit one
+ * RCL, so we do that here, when we decided that we need to finish the job.
+ * Any rendering that happens outside a render pass is never merged, so
+ * the RCL should have been emitted by the time we got here.
+ */
+ assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass);
+ if (cmd_buffer->state.pass) {
+ emit_rcl(cmd_buffer);
+ emit_binning_flush(job);
+ }
+
list_addtail(&job->list_link, &cmd_buffer->submit_jobs);
cmd_buffer->state.job = NULL;
}
@@ -181,6 +268,12 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_job *
v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer)
{
+ /* Don't create a new job if we can merge the current subpass into
+ * the current job.
+ */
+ if (cmd_buffer->state.pass && cmd_buffer_can_merge_subpass(cmd_buffer))
+ return cmd_buffer->state.job;
+
/* Ensure we are not starting a new job without finishing a previous one */
if (cmd_buffer->state.job != NULL)
v3dv_cmd_buffer_finish_job(cmd_buffer);
@@ -206,6 +299,13 @@ v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer)
v3dv_cl_init(job, &job->indirect);
v3dv_cl_begin(&job->indirect);
+ /* Keep track of the first subpass that we are recording in this new job.
+ * We will use this when we emit the RCL to decide how to emit our loads
+ * and stores.
+ */
+ if (cmd_buffer->state.pass)
+ job->first_subpass = cmd_buffer->state.subpass_idx;
+
cmd_buffer->state.job = job;
return job;
}
@@ -601,7 +701,7 @@ emit_loads(struct v3dv_cmd_buffer *cmd_buffer,
bool needs_load =
attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
(attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
- state->subpass_idx > attachment_state->first_subpass);
+ state->job->first_subpass > attachment_state->first_subpass);
if (needs_load) {
struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
@@ -677,7 +777,7 @@ emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
/* Only clear once on the first subpass that uses the attachment */
bool needs_clear =
attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
- state->attachments[attachment_idx].first_subpass == state->subpass_idx;
+ state->attachments[attachment_idx].first_subpass == state->job->first_subpass;
store_general(cmd_buffer, cl,
attachment_idx, layer, RENDER_TARGET_0 + i, needs_clear);
has_stores = true;
@@ -979,83 +1079,77 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
assert(state->subpass_idx < state->pass->subpass_count);
- /* FIXME: for now, each subpass goes into a separate job. In the future we
- * might be able to merge subpasses that render to the same render targets
- * so long as they don't render to more than 4 color attachments and there
- * aren't other subpass dependencies preveting this.
- */
struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer);
- const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+ /* If we are starting a new job we need to setup binning. */
+ if (job->first_subpass == state->subpass_idx) {
+ const struct v3dv_framebuffer *framebuffer =
+ cmd_buffer->state.framebuffer;
- /* Setup binning for this subpass.
- *
- * FIXME: For now we do this at the start each subpass but if we implement
- * subpass merges in the future we would only want to emit this once per job.
- */
- v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
+ v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
- /* The PTB will request the tile alloc initial size per tile at start
- * of tile binning.
- */
- const uint32_t fb_layers = 1; /* FIXME */
- uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
- framebuffer->draw_tiles_x *
- framebuffer->draw_tiles_y;
+ /* The PTB will request the tile alloc initial size per tile at start
+ * of tile binning.
+ */
+ const uint32_t fb_layers = 1; /* FIXME */
+ uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
+ framebuffer->draw_tiles_x *
+ framebuffer->draw_tiles_y;
- /* The PTB allocates in aligned 4k chunks after the initial setup. */
- tile_alloc_size = align(tile_alloc_size, 4096);
+ /* The PTB allocates in aligned 4k chunks after the initial setup. */
+ tile_alloc_size = align(tile_alloc_size, 4096);
- /* Include the first two chunk allocations that the PTB does so that
- * we definitely clear the OOM condition before triggering one (the HW
- * won't trigger OOM during the first allocations).
- */
- tile_alloc_size += 8192;
+ /* Include the first two chunk allocations that the PTB does so that
+ * we definitely clear the OOM condition before triggering one (the HW
+ * won't trigger OOM during the first allocations).
+ */
+ tile_alloc_size += 8192;
- /* For performance, allocate some extra initial memory after the PTB's
- * minimal allocations, so that we hopefully don't have to block the
- * GPU on the kernel handling an OOM signal.
- */
- tile_alloc_size += 512 * 1024;
+ /* For performance, allocate some extra initial memory after the PTB's
+ * minimal allocations, so that we hopefully don't have to block the
+ * GPU on the kernel handling an OOM signal.
+ */
+ tile_alloc_size += 512 * 1024;
- job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
- v3dv_job_add_bo(job, job->tile_alloc);
+ job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
+ v3dv_job_add_bo(job, job->tile_alloc);
- const uint32_t tsda_per_tile_size = 256;
- const uint32_t tile_state_size = MAX2(fb_layers, 1) *
- framebuffer->draw_tiles_x *
- framebuffer->draw_tiles_y *
- tsda_per_tile_size;
- job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
- v3dv_job_add_bo(job, job->tile_state);
+ const uint32_t tsda_per_tile_size = 256;
+ const uint32_t tile_state_size = MAX2(fb_layers, 1) *
+ framebuffer->draw_tiles_x *
+ framebuffer->draw_tiles_y *
+ tsda_per_tile_size;
+ job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
+ v3dv_job_add_bo(job, job->tile_state);
- /* This must go before the binning mode configuration. It is
- * required for layered framebuffers to work.
- */
- if (fb_layers > 0) {
- cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
- config.number_of_layers = fb_layers;
+ /* This must go before the binning mode configuration. It is
+ * required for layered framebuffers to work.
+ */
+ if (fb_layers > 0) {
+ cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
+ config.number_of_layers = fb_layers;
+ }
}
- }
- cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
- config.width_in_pixels = framebuffer->width;
- config.height_in_pixels = framebuffer->height;
- config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
- config.multisample_mode_4x = false; /* FIXME */
- config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
- }
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
+ config.width_in_pixels = framebuffer->width;
+ config.height_in_pixels = framebuffer->height;
+ config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
+ config.multisample_mode_4x = false; /* FIXME */
+ config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
+ }
- /* There's definitely nothing in the VCD cache we want. */
- cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
- /* Disable any leftover OQ state from another job. */
- cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+ /* Disable any leftover OQ state from another job. */
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
- /* "Binning mode lists must have a Start Tile Binning item (6) after
- * any prefix state data before the binning list proper starts."
- */
- cl_emit(&job->bcl, START_TILE_BINNING, bin);
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+ }
/* If we don't have a scissor or viewport defined let's just use the render
* area as clip_window, as that would be required for a clear in any
@@ -1090,16 +1184,6 @@ subpass_finish(struct v3dv_cmd_buffer *cmd_buffer)
{
struct v3dv_job *job = cmd_buffer->state.job;
assert(job);
-
- emit_rcl(cmd_buffer);
-
- /* This finishes the a binning job.
- *
- * FIXME: if the next subpass draws to the same RTs, we could skip this
- * and the binning setup for the next subpass.
- */
- emit_binning_flush(job);
- v3dv_cmd_buffer_finish_job(cmd_buffer);
}
void
@@ -1111,6 +1195,7 @@ v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
assert(state->subpass_idx == state->pass->subpass_count - 1);
subpass_finish(cmd_buffer);
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
/* We are no longer inside a render pass */
state->pass = NULL;