diff options
Diffstat (limited to 'src/broadcom/vulkan/v3dv_cmd_buffer.c')
-rw-r--r-- | src/broadcom/vulkan/v3dv_cmd_buffer.c | 233 |
1 files changed, 159 insertions, 74 deletions
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 75845de803f..75977333389 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -53,6 +53,9 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer); static void subpass_finish(struct v3dv_cmd_buffer *cmd_buffer); +static void +emit_rcl(struct v3dv_cmd_buffer *cmd_buffer); + VkResult v3dv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo, @@ -167,6 +170,79 @@ emit_binning_flush(struct v3dv_job *job) cl_emit(&job->bcl, FLUSH, flush); } +static bool +attachment_list_is_subset(struct v3dv_subpass_attachment *l1, uint32_t l1_count, + struct v3dv_subpass_attachment *l2, uint32_t l2_count) +{ + for (uint32_t i = 0; i < l1_count; i++) { + uint32_t attachment_idx = l1[i].attachment; + if (attachment_idx == VK_ATTACHMENT_UNUSED) + continue; + + uint32_t j; + for (j = 0; j < l2_count; j++) { + if (l2[j].attachment == attachment_idx) + break; + } + if (j == l2_count) + return false; + } + + return true; + } + +static bool +cmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer) +{ + const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + assert(state->pass); + + const struct v3dv_physical_device *physical_device = + &cmd_buffer->device->instance->physicalDevice; + + if (!physical_device->options.merge_jobs) + return false; + + /* Each render pass starts a new job */ + if (state->subpass_idx == 0) + return false; + + /* Two subpasses can be merged in the same job if we can emit a single RCL + * for them (since the RCL includes the END_OF_RENDERING command that + * triggers the "render job finished" interrupt). We can do this so long + * as both subpasses render against the same attachments. + */ + uint32_t prev_subpass_idx = state->subpass_idx - 1; + struct v3dv_subpass *prev_subpass = &state->pass->subpasses[prev_subpass_idx]; + struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx]; + + /* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED, + * we need to check that for each subpass all its used attachments are + * used by the other subpass. + */ + bool compatible = + attachment_list_is_subset(prev_subpass->color_attachments, + prev_subpass->color_count, + subpass->color_attachments, + subpass->color_count); + if (!compatible) + return false; + + compatible = + attachment_list_is_subset(subpass->color_attachments, + subpass->color_count, + prev_subpass->color_attachments, + prev_subpass->color_count); + if (!compatible) + return false; + + /* FIXME: resolve attachments */ + + /* FIXME: also check depth/stencil attachment */ + + return true; +} + void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) { @@ -174,6 +250,17 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) assert(job); assert(v3dv_cl_offset(&job->bcl) != 0); + /* When we merge multiple subpasses into the same job we must only emit one + * RCL, so we do that here, when we decided that we need to finish the job. + * Any rendering that happens outside a render pass is never merged, so + * the RCL should have been emitted by the time we got here. + */ + assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass); + if (cmd_buffer->state.pass) { + emit_rcl(cmd_buffer); + emit_binning_flush(job); + } + list_addtail(&job->list_link, &cmd_buffer->submit_jobs); cmd_buffer->state.job = NULL; } @@ -181,6 +268,12 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_job * v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer) { + /* Don't create a new job if we can merge the current subpass into + * the current job. + */ + if (cmd_buffer->state.pass && cmd_buffer_can_merge_subpass(cmd_buffer)) + return cmd_buffer->state.job; + /* Ensure we are not starting a new job without finishing a previous one */ if (cmd_buffer->state.job != NULL) v3dv_cmd_buffer_finish_job(cmd_buffer); @@ -206,6 +299,13 @@ v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer) v3dv_cl_init(job, &job->indirect); v3dv_cl_begin(&job->indirect); + /* Keep track of the first subpass that we are recording in this new job. + * We will use this when we emit the RCL to decide how to emit our loads + * and stores. + */ + if (cmd_buffer->state.pass) + job->first_subpass = cmd_buffer->state.subpass_idx; + cmd_buffer->state.job = job; return job; } @@ -601,7 +701,7 @@ emit_loads(struct v3dv_cmd_buffer *cmd_buffer, bool needs_load = attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || (attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR && - state->subpass_idx > attachment_state->first_subpass); + state->job->first_subpass > attachment_state->first_subpass); if (needs_load) { struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; @@ -677,7 +777,7 @@ emit_stores(struct v3dv_cmd_buffer *cmd_buffer, /* Only clear once on the first subpass that uses the attachment */ bool needs_clear = attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR && - state->attachments[attachment_idx].first_subpass == state->subpass_idx; + state->attachments[attachment_idx].first_subpass == state->job->first_subpass; store_general(cmd_buffer, cl, attachment_idx, layer, RENDER_TARGET_0 + i, needs_clear); has_stores = true; @@ -979,83 +1079,77 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer) assert(state->subpass_idx < state->pass->subpass_count); - /* FIXME: for now, each subpass goes into a separate job. In the future we - * might be able to merge subpasses that render to the same render targets - * so long as they don't render to more than 4 color attachments and there - * aren't other subpass dependencies preveting this. - */ struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer); - const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; + /* If we are starting a new job we need to setup binning. */ + if (job->first_subpass == state->subpass_idx) { + const struct v3dv_framebuffer *framebuffer = + cmd_buffer->state.framebuffer; - /* Setup binning for this subpass. - * - * FIXME: For now we do this at the start each subpass but if we implement - * subpass merges in the future we would only want to emit this once per job. - */ - v3dv_cl_ensure_space_with_branch(&job->bcl, 256); + v3dv_cl_ensure_space_with_branch(&job->bcl, 256); - /* The PTB will request the tile alloc initial size per tile at start - * of tile binning. - */ - const uint32_t fb_layers = 1; /* FIXME */ - uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) * - framebuffer->draw_tiles_x * - framebuffer->draw_tiles_y; + /* The PTB will request the tile alloc initial size per tile at start + * of tile binning. + */ + const uint32_t fb_layers = 1; /* FIXME */ + uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) * + framebuffer->draw_tiles_x * + framebuffer->draw_tiles_y; - /* The PTB allocates in aligned 4k chunks after the initial setup. */ - tile_alloc_size = align(tile_alloc_size, 4096); + /* The PTB allocates in aligned 4k chunks after the initial setup. */ + tile_alloc_size = align(tile_alloc_size, 4096); - /* Include the first two chunk allocations that the PTB does so that - * we definitely clear the OOM condition before triggering one (the HW - * won't trigger OOM during the first allocations). - */ - tile_alloc_size += 8192; + /* Include the first two chunk allocations that the PTB does so that + * we definitely clear the OOM condition before triggering one (the HW + * won't trigger OOM during the first allocations). + */ + tile_alloc_size += 8192; - /* For performance, allocate some extra initial memory after the PTB's - * minimal allocations, so that we hopefully don't have to block the - * GPU on the kernel handling an OOM signal. - */ - tile_alloc_size += 512 * 1024; + /* For performance, allocate some extra initial memory after the PTB's + * minimal allocations, so that we hopefully don't have to block the + * GPU on the kernel handling an OOM signal. + */ + tile_alloc_size += 512 * 1024; - job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size); - v3dv_job_add_bo(job, job->tile_alloc); + job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size); + v3dv_job_add_bo(job, job->tile_alloc); - const uint32_t tsda_per_tile_size = 256; - const uint32_t tile_state_size = MAX2(fb_layers, 1) * - framebuffer->draw_tiles_x * - framebuffer->draw_tiles_y * - tsda_per_tile_size; - job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size); - v3dv_job_add_bo(job, job->tile_state); + const uint32_t tsda_per_tile_size = 256; + const uint32_t tile_state_size = MAX2(fb_layers, 1) * + framebuffer->draw_tiles_x * + framebuffer->draw_tiles_y * + tsda_per_tile_size; + job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size); + v3dv_job_add_bo(job, job->tile_state); - /* This must go before the binning mode configuration. It is - * required for layered framebuffers to work. - */ - if (fb_layers > 0) { - cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { - config.number_of_layers = fb_layers; + /* This must go before the binning mode configuration. It is + * required for layered framebuffers to work. + */ + if (fb_layers > 0) { + cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { + config.number_of_layers = fb_layers; + } } - } - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { - config.width_in_pixels = framebuffer->width; - config.height_in_pixels = framebuffer->height; - config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1); - config.multisample_mode_4x = false; /* FIXME */ - config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp; - } + cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { + config.width_in_pixels = framebuffer->width; + config.height_in_pixels = framebuffer->height; + config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1); + config.multisample_mode_4x = false; /* FIXME */ + config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp; + } - /* There's definitely nothing in the VCD cache we want. */ - cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); - /* Disable any leftover OQ state from another job. */ - cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); + /* Disable any leftover OQ state from another job. */ + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); - /* "Binning mode lists must have a Start Tile Binning item (6) after - * any prefix state data before the binning list proper starts." - */ - cl_emit(&job->bcl, START_TILE_BINNING, bin); + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); + } /* If we don't have a scissor or viewport defined let's just use the render * area as clip_window, as that would be required for a clear in any @@ -1090,16 +1184,6 @@ subpass_finish(struct v3dv_cmd_buffer *cmd_buffer) { struct v3dv_job *job = cmd_buffer->state.job; assert(job); - - emit_rcl(cmd_buffer); - - /* This finishes the a binning job. - * - * FIXME: if the next subpass draws to the same RTs, we could skip this - * and the binning setup for the next subpass. - */ - emit_binning_flush(job); - v3dv_cmd_buffer_finish_job(cmd_buffer); } void @@ -1111,6 +1195,7 @@ v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer) struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; assert(state->subpass_idx == state->pass->subpass_count - 1); subpass_finish(cmd_buffer); + v3dv_cmd_buffer_finish_job(cmd_buffer); /* We are no longer inside a render pass */ state->pass = NULL; |