summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/broadcom/vulkan/v3dv_cl.c24
-rw-r--r--src/broadcom/vulkan/v3dv_cl.h10
-rw-r--r--src/broadcom/vulkan/v3dv_cmd_buffer.c386
-rw-r--r--src/broadcom/vulkan/v3dv_meta_copy.c76
-rw-r--r--src/broadcom/vulkan/v3dv_private.h46
-rw-r--r--src/broadcom/vulkan/v3dv_queue.c70
-rw-r--r--src/broadcom/vulkan/v3dv_uniforms.c13
7 files changed, 369 insertions, 256 deletions
diff --git a/src/broadcom/vulkan/v3dv_cl.c b/src/broadcom/vulkan/v3dv_cl.c
index d3494c53f64..e20e6733356 100644
--- a/src/broadcom/vulkan/v3dv_cl.c
+++ b/src/broadcom/vulkan/v3dv_cl.c
@@ -25,20 +25,18 @@
#include "broadcom/cle/v3dx_pack.h"
void
-v3dv_cl_init(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl *cl)
+v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl)
{
cl->base = NULL;
cl->next = cl->base;
cl->bo = NULL;
cl->size = 0;
- cl->cmd_buffer = cmd_buffer;
+ cl->job = job;
}
void
v3dv_cl_begin(struct v3dv_cl *cl)
{
- assert(!cl->cmd_buffer ||
- cl->cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED);
assert(v3dv_cl_offset(cl) == 0);
}
@@ -48,15 +46,15 @@ v3dv_cl_reset(struct v3dv_cl *cl)
/* FIXME: consider keeping the BO when the command buffer is reset with
* flag VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT.
*/
- v3dv_cl_init(cl->cmd_buffer, cl);
+ v3dv_cl_init(cl->job, cl);
}
void
v3dv_cl_destroy(struct v3dv_cl *cl)
{
if (cl->bo) {
- assert(cl->cmd_buffer);
- v3dv_bo_free(cl->cmd_buffer->device, cl->bo);
+ assert(cl->job);
+ v3dv_bo_free(cl->job->cmd_buffer->device, cl->bo);
}
/* Leave the CL in a reset state to catch use after destroy instances */
@@ -73,15 +71,15 @@ v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment)
return offset;
}
- struct v3dv_bo *bo = v3dv_bo_alloc(cl->cmd_buffer->device, space);
+ struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space);
if (!bo) {
fprintf(stderr, "failed to allocate memory for command list");
abort();
}
- v3dv_cmd_buffer_add_bo(cl->cmd_buffer, bo);
+ v3dv_job_add_bo(cl->job, bo);
- bool ok = v3dv_bo_map(cl->cmd_buffer->device, bo, bo->size);
+ bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
if (!ok) {
fprintf(stderr, "failed to map command list buffer");
abort();
@@ -102,7 +100,7 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
if (v3dv_cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
return;
- struct v3dv_bo *bo = v3dv_bo_alloc(cl->cmd_buffer->device, space);
+ struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space);
if (!bo) {
fprintf(stderr, "failed to allocate memory for command list");
abort();
@@ -115,9 +113,9 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
}
}
- v3dv_cmd_buffer_add_bo(cl->cmd_buffer, bo);
+ v3dv_job_add_bo(cl->job, bo);
- bool ok = v3dv_bo_map(cl->cmd_buffer->device, bo, bo->size);
+ bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
if (!ok) {
fprintf(stderr, "failed to map command list buffer");
abort();
diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h
index f58b2d5cf99..c95110f48e0 100644
--- a/src/broadcom/vulkan/v3dv_cl.h
+++ b/src/broadcom/vulkan/v3dv_cl.h
@@ -27,10 +27,10 @@
#include "broadcom/cle/v3d_packet_helpers.h"
struct v3dv_bo;
-struct v3dv_cmd_buffer;
+struct v3dv_job;
struct v3dv_cl;
-void v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo);
+void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
/**
* Undefined structure, used for typechecking that you're passing the pointers
@@ -46,7 +46,7 @@ struct v3dv_cl_reloc {
struct v3dv_cl {
void *base;
- struct v3dv_cmd_buffer *cmd_buffer;
+ struct v3dv_job *job;
struct v3dv_cl_out *next;
struct v3dv_bo *bo;
uint32_t size;
@@ -82,7 +82,7 @@ v3dv_cl_get_address(struct v3dv_cl *cl)
return (struct v3dv_cl_reloc){ .bo = cl->bo, .offset = v3dv_cl_offset(cl) };
}
-void v3dv_cl_init(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl *cl);
+void v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl);
void v3dv_cl_begin(struct v3dv_cl *cl);
void v3dv_cl_reset(struct v3dv_cl *cl);
void v3dv_cl_destroy(struct v3dv_cl *cl);
@@ -167,7 +167,7 @@ static inline void
cl_pack_emit_reloc(struct v3dv_cl *cl, const struct v3dv_cl_reloc *reloc)
{
if (reloc->bo)
- v3dv_cmd_buffer_add_bo(cl->cmd_buffer, reloc->bo);
+ v3dv_job_add_bo(cl->job, reloc->bo);
}
#endif /* V3DV_CL_H */
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index ca302db0301..caf2d2ad5be 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -35,16 +35,16 @@ const struct v3dv_dynamic_state default_dynamic_state = {
};
void
-v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo)
+v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
{
if (!bo)
return;
- if (_mesa_set_search(cmd_buffer->bos, bo))
+ if (_mesa_set_search(job->bos, bo))
return;
- _mesa_set_add(cmd_buffer->bos, bo);
- cmd_buffer->bo_count++;
+ _mesa_set_add(job->bos, bo);
+ job->bo_count++;
}
VkResult
@@ -94,13 +94,7 @@ cmd_buffer_create(struct v3dv_device *device,
cmd_buffer->level = level;
cmd_buffer->usage_flags = 0;
- cmd_buffer->bos =
- _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- cmd_buffer->bo_count = 0;
-
- v3dv_cl_init(cmd_buffer, &cmd_buffer->bcl);
- v3dv_cl_init(cmd_buffer, &cmd_buffer->rcl);
- v3dv_cl_init(cmd_buffer, &cmd_buffer->indirect);
+ list_inithead(&cmd_buffer->submit_jobs);
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_NEW;
@@ -113,48 +107,114 @@ cmd_buffer_create(struct v3dv_device *device,
}
static void
-cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
+job_destroy(struct v3dv_job *job)
{
- list_del(&cmd_buffer->pool_link);
+ assert(job);
+
+ list_del(&job->list_link);
- v3dv_cl_destroy(&cmd_buffer->bcl);
- v3dv_cl_destroy(&cmd_buffer->rcl);
- v3dv_cl_destroy(&cmd_buffer->indirect);
+ v3dv_cl_destroy(&job->bcl);
+ v3dv_cl_destroy(&job->rcl);
+ v3dv_cl_destroy(&job->indirect);
/* Since we don't ref BOs, when we add them to the command buffer, don't
* unref them here either.
*/
#if 0
- set_foreach(cmd_buffer->bos, entry) {
+ set_foreach(job->bos, entry) {
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
v3dv_bo_free(cmd_buffer->device, bo);
}
#endif
- _mesa_set_destroy(cmd_buffer->bos, NULL);
+ _mesa_set_destroy(job->bos, NULL);
+
+ v3dv_bo_free(job->cmd_buffer->device, job->tile_alloc);
+ v3dv_bo_free(job->cmd_buffer->device, job->tile_state);
+}
+
+static void
+cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ list_del(&cmd_buffer->pool_link);
- v3dv_bo_free(cmd_buffer->device, cmd_buffer->tile_alloc);
- v3dv_bo_free(cmd_buffer->device, cmd_buffer->tile_state);
+ list_for_each_entry_safe(struct v3dv_job, job,
+ &cmd_buffer->submit_jobs, list_link) {
+ job_destroy(job);
+ }
+
+ if (cmd_buffer->state.job)
+ job_destroy(cmd_buffer->state.job);
vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
}
+static void
+emit_binning_flush(struct v3dv_job *job)
+{
+ assert(job);
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
+ cl_emit(&job->bcl, FLUSH, flush);
+}
+
+void
+v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+ assert(v3dv_cl_offset(&job->bcl) != 0);
+
+ list_addtail(&job->list_link, &cmd_buffer->submit_jobs);
+ cmd_buffer->state.job = NULL;
+}
+
+struct v3dv_job *
+v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ /* Ensure we are not starting a new job without finishing a previous one */
+ if (cmd_buffer->state.job != NULL) {
+ emit_binning_flush(cmd_buffer->state.job);
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+ }
+
+ assert(cmd_buffer->state.job == NULL);
+ struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->alloc,
+ sizeof(struct v3dv_job), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ assert(job);
+
+ job->cmd_buffer = cmd_buffer;
+
+ job->bos =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ job->bo_count = 0;
+
+ v3dv_cl_init(job, &job->bcl);
+ v3dv_cl_begin(&job->bcl);
+
+ v3dv_cl_init(job, &job->rcl);
+ v3dv_cl_begin(&job->rcl);
+
+ v3dv_cl_init(job, &job->indirect);
+ v3dv_cl_begin(&job->indirect);
+
+ cmd_buffer->state.job = job;
+ return job;
+}
+
static VkResult
cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
- cmd_buffer->usage_flags = 0;
+ /* FIXME */
+ assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_NEW);
- _mesa_set_clear(cmd_buffer->bos, NULL);
- cmd_buffer->bo_count = 0;
-
- v3dv_cl_reset(&cmd_buffer->bcl);
- v3dv_cl_reset(&cmd_buffer->rcl);
- v3dv_cl_reset(&cmd_buffer->indirect);
+ cmd_buffer->usage_flags = 0;
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
state->pass = NULL;
state->framebuffer = NULL;
state->subpass_idx = 0;
+ state->job = NULL;
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED;
}
@@ -248,19 +308,16 @@ v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
cmd_buffer->usage_flags = pBeginInfo->flags;
- v3dv_cl_begin(&cmd_buffer->bcl);
- v3dv_cl_begin(&cmd_buffer->rcl);
- v3dv_cl_begin(&cmd_buffer->indirect);
-
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING;
return VK_SUCCESS;
}
static void
-emit_clip_window(struct v3dv_cmd_buffer *cmd_buffer, VkRect2D *rect)
+emit_clip_window(struct v3dv_job *job, const VkRect2D *rect)
{
- cl_emit(&cmd_buffer->bcl, CLIP_WINDOW, clip) {
+ assert(job);
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
clip.clip_window_left_pixel_coordinate = rect->offset.x;
clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
clip.clip_window_width_in_pixels = rect->extent.width;
@@ -349,90 +406,12 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
pRenderPassBegin->clearValueCount,
pRenderPassBegin->pClearValues);
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, 256);
-
- /* The PTB will request the tile alloc initial size per tile at start
- * of tile binning.
- */
- const uint32_t fb_layers = 1; /* FIXME */
- uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
- framebuffer->draw_tiles_x *
- framebuffer->draw_tiles_y;
-
- /* The PTB allocates in aligned 4k chunks after the initial setup. */
- tile_alloc_size = align(tile_alloc_size, 4096);
-
- /* Include the first two chunk allocations that the PTB does so that
- * we definitely clear the OOM condition before triggering one (the HW
- * won't trigger OOM during the first allocations).
- */
- tile_alloc_size += 8192;
-
- /* For performance, allocate some extra initial memory after the PTB's
- * minimal allocations, so that we hopefully don't have to block the
- * GPU on the kernel handling an OOM signal.
- */
- tile_alloc_size += 512 * 1024;
-
- cmd_buffer->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
- v3dv_cmd_buffer_add_bo(cmd_buffer, cmd_buffer->tile_alloc);
-
- const uint32_t tsda_per_tile_size = 256;
- const uint32_t tile_state_size = MAX2(fb_layers, 1) *
- framebuffer->draw_tiles_x *
- framebuffer->draw_tiles_y *
- tsda_per_tile_size;
- cmd_buffer->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
- v3dv_cmd_buffer_add_bo(cmd_buffer, cmd_buffer->tile_state);
-
- /* This must go before the binning mode configuration. It is
- * required for layered framebuffers to work.
- */
- if (fb_layers > 0) {
- cl_emit(&cmd_buffer->bcl, NUMBER_OF_LAYERS, config) {
- config.number_of_layers = fb_layers;
- }
- }
-
- cl_emit(&cmd_buffer->bcl, TILE_BINNING_MODE_CFG, config) {
- config.width_in_pixels = framebuffer->width;
- config.height_in_pixels = framebuffer->height;
- config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
- config.multisample_mode_4x = false; /* FIXME */
- config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
- }
-
- /* There's definitely nothing in the VCD cache we want. */
- cl_emit(&cmd_buffer->bcl, FLUSH_VCD_CACHE, bin);
-
- /* Disable any leftover OQ state from another job. */
- cl_emit(&cmd_buffer->bcl, OCCLUSION_QUERY_COUNTER, counter);
-
- /* "Binning mode lists must have a Start Tile Binning item (6) after
- * any prefix state data before the binning list proper starts."
- */
- cl_emit(&cmd_buffer->bcl, START_TILE_BINNING, bin);
-
/* FIXME: probably need to align the render area to tile boundaries since
* the tile clears will render full tiles anyway.
* See vkGetRenderAreaGranularity().
*/
state->render_area = pRenderPassBegin->renderArea;
- /* If we don't have a scissor or viewport defined let's just use the render
- * area as clip_window, as that would be required for a clear in any
- * case. If we have that, it would be emitted as part of the pipeline
- * dynamic state flush
- *
- * FIXME: this is mostly just needed for clear. radv has dedicated paths
- * for them, so we could get that idea. In any case, need to revisit if
- * this is the place to emit the clip window.
- */
- if (cmd_buffer->state.dynamic.scissor.count == 0 &&
- cmd_buffer->state.dynamic.viewport.count == 0) {
- emit_clip_window(cmd_buffer, &state->render_area);
- }
-
/* Setup for first subpass */
state->subpass_idx = 0;
}
@@ -627,10 +606,13 @@ emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
static void
emit_generic_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
/* Emit the generic list in our indirect state -- the rcl will just
* have pointers into it.
*/
- struct v3dv_cl *cl = &cmd_buffer->indirect;
+ struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
@@ -653,7 +635,7 @@ emit_generic_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
- cl_emit(&cmd_buffer->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
@@ -665,7 +647,8 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- struct v3dv_cl *rcl = &cmd_buffer->rcl;
+ struct v3dv_job *job = cmd_buffer->state.job;
+ struct v3dv_cl *rcl = &job->rcl;
/* If doing multicore binning, we would need to initialize each
* core's tile list here.
@@ -673,7 +656,7 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
const uint32_t tile_alloc_offset =
64 * layer * framebuffer->draw_tiles_x * framebuffer->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(cmd_buffer->tile_alloc, tile_alloc_offset);
+ list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
@@ -758,10 +741,13 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
static void
emit_rcl(struct v3dv_cmd_buffer *cmd_buffer)
{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
/* FIXME */
const uint32_t fb_layers = 1;
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->rcl, 200 +
+ v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
MAX2(fb_layers, 1) * 256 *
cl_packet_length(SUPERTILE_COORDINATES));
@@ -772,7 +758,7 @@ emit_rcl(struct v3dv_cmd_buffer *cmd_buffer)
const struct v3dv_subpass *subpass =
&state->pass->subpasses[state->subpass_idx];
- struct v3dv_cl *rcl = &cmd_buffer->rcl;
+ struct v3dv_cl *rcl = &job->rcl;
/* Comon config must be the first TILE_RENDERING_MODE_CFG and
* Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
@@ -892,7 +878,7 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
for (uint32_t i = 0; i < subpass->color_count; i++) {
uint32_t rp_attachment_idx = subpass->color_attachments[i].attachment;
const struct v3dv_render_pass_attachment *attachment =
- &cmd_buffer->state.pass->attachments[rp_attachment_idx];
+ &state->pass->attachments[rp_attachment_idx];
/* FIXME: if a previous subpass has alredy computed the hw clear color
* for this attachment we could skip this. We can just flag this
@@ -904,7 +890,7 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
const uint32_t sp_attachment_idx = i;
const struct v3dv_image_view *iview =
- cmd_buffer->state.framebuffer->attachments[sp_attachment_idx];
+ state->framebuffer->attachments[sp_attachment_idx];
assert((iview->aspects &
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) == 0);
@@ -917,20 +903,113 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
clear_color);
}
}
+
+ /* FIXME: for now, each subpass goes into a separate job. In the future we
+ * might be able to merge subpasses that render to the same render targets
+ * so long as they don't render to more than 4 color attachments and there
+ * aren't other subpass dependencies preveting this.
+ */
+ struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer);
+
+ const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+
+ /* Setup binning for this subpass.
+ *
+ * FIXME: For now we do this at the start each subpass but if we implement
+ * subpass merges in the future we would only want to emit this once per job.
+ */
+ v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
+
+ /* The PTB will request the tile alloc initial size per tile at start
+ * of tile binning.
+ */
+ const uint32_t fb_layers = 1; /* FIXME */
+ uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
+ framebuffer->draw_tiles_x *
+ framebuffer->draw_tiles_y;
+
+ /* The PTB allocates in aligned 4k chunks after the initial setup. */
+ tile_alloc_size = align(tile_alloc_size, 4096);
+
+ /* Include the first two chunk allocations that the PTB does so that
+ * we definitely clear the OOM condition before triggering one (the HW
+ * won't trigger OOM during the first allocations).
+ */
+ tile_alloc_size += 8192;
+
+ /* For performance, allocate some extra initial memory after the PTB's
+ * minimal allocations, so that we hopefully don't have to block the
+ * GPU on the kernel handling an OOM signal.
+ */
+ tile_alloc_size += 512 * 1024;
+
+ job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
+ v3dv_job_add_bo(job, job->tile_alloc);
+
+ const uint32_t tsda_per_tile_size = 256;
+ const uint32_t tile_state_size = MAX2(fb_layers, 1) *
+ framebuffer->draw_tiles_x *
+ framebuffer->draw_tiles_y *
+ tsda_per_tile_size;
+ job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
+ v3dv_job_add_bo(job, job->tile_state);
+
+ /* This must go before the binning mode configuration. It is
+ * required for layered framebuffers to work.
+ */
+ if (fb_layers > 0) {
+ cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
+ config.number_of_layers = fb_layers;
+ }
+ }
+
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
+ config.width_in_pixels = framebuffer->width;
+ config.height_in_pixels = framebuffer->height;
+ config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
+ config.multisample_mode_4x = false; /* FIXME */
+ config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
+ }
+
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+ /* Disable any leftover OQ state from another job. */
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+ /* If we don't have a scissor or viewport defined let's just use the render
+ * area as clip_window, as that would be required for a clear in any
+ * case. If we have that, it would be emitted as part of the pipeline
+ * dynamic state flush
+ *
+ * FIXME: this is mostly just needed for clear. radv has dedicated paths
+ * for them, so we could get that idea. In any case, need to revisit if
+ * this is the place to emit the clip window.
+ */
+ if (cmd_buffer->state.dynamic.scissor.count == 0 &&
+ cmd_buffer->state.dynamic.viewport.count == 0) {
+ emit_clip_window(job, &state->render_area);
+ }
}
static void
subpass_finish(struct v3dv_cmd_buffer *cmd_buffer)
{
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, cl_packet_length(FLUSH));
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
- /* We need to emit a flush between binning jobs, so do this before we start
- * recording the next subpass.
+ /* This finishes the a binning job.
*
* FIXME: if the next subpass draws to the same RTs, we could skip this
* and the binning setup for the next subpass.
*/
- cl_emit(&cmd_buffer->bcl, FLUSH, flush);
+ emit_binning_flush(job);
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
}
static void
@@ -961,11 +1040,18 @@ v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- if (v3dv_cl_offset(&cmd_buffer->bcl) == 0)
- return VK_SUCCESS; /* FIXME? */
-
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_EXECUTABLE;
+ struct v3dv_job *job = cmd_buffer->state.job;
+ if (!job)
+ return VK_SUCCESS;
+
+ /* We get here if we recorded commands after the last render pass in the
+ * command buffer. Make sure we finish this last job. */
+ assert(v3dv_cl_offset(&job->bcl) != 0);
+ emit_binning_flush(job);
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+
return VK_SUCCESS;
}
@@ -1028,11 +1114,11 @@ v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
/* FIXME: is here the best moment to do that? or when drawing? */
if (pipeline->vs->assembly_bo)
- v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->vs->assembly_bo);
+ v3dv_job_add_bo(cmd_buffer->state.job, pipeline->vs->assembly_bo);
if (pipeline->vs_bin->assembly_bo)
- v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->vs_bin->assembly_bo);
+ v3dv_job_add_bo(cmd_buffer->state.job, pipeline->vs_bin->assembly_bo);
if (pipeline->fs->assembly_bo)
- v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->fs->assembly_bo);
+ v3dv_job_add_bo(cmd_buffer->state.job, pipeline->fs->assembly_bo);
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
break;
@@ -1181,7 +1267,7 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
clip_window.extent.width = maxx - minx;
clip_window.extent.height = maxy - miny;
- emit_clip_window(cmd_buffer, &clip_window);
+ emit_clip_window(cmd_buffer->state.job, &clip_window);
}
static void
@@ -1194,23 +1280,26 @@ emit_viewport(struct v3dv_cmd_buffer *cmd_buffer)
float *vptranslate = dynamic->viewport.translate[0];
float *vpscale = dynamic->viewport.scale[0];
- cl_emit(&cmd_buffer->bcl, CLIPPER_XY_SCALING, clip) {
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
}
- cl_emit(&cmd_buffer->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
clip.viewport_z_offset_zc_to_zs = vptranslate[2];
clip.viewport_z_scale_zc_to_zs = vpscale[2];
}
- cl_emit(&cmd_buffer->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+ cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
float z1 = (vptranslate[2] - vpscale[2]);
float z2 = (vptranslate[2] + vpscale[2]);
clip.minimum_zw = MIN2(z1, z2);
clip.maximum_zw = MAX2(z1, z2);
}
- cl_emit(&cmd_buffer->bcl, VIEWPORT_OFFSET, vp) {
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
vp.viewport_centre_x_coordinate = vptranslate[0];
vp.viewport_centre_y_coordinate = vptranslate[1];
}
@@ -1233,9 +1322,11 @@ struct vpm_config {
static void
cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
struct v3dv_pipeline *pipeline = state->pipeline;
-
assert(pipeline);
/* Upload the uniforms to the indirect CL first */
@@ -1249,9 +1340,9 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin);
/* Update the cache dirty flag based on the shader progs data */
- state->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
- state->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
- state->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
/* FIXME: fake vtx->num_elements, that is the vertex state that includes
* data from the buffers used on the vertex. Such info is still not
@@ -1267,7 +1358,7 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
uint32_t num_elements_to_emit = MAX2(vtx_num_elements, 1);
uint32_t shader_rec_offset =
- v3dv_cl_ensure_space(&cmd_buffer->indirect,
+ v3dv_cl_ensure_space(&job->indirect,
cl_packet_length(GL_SHADER_STATE_RECORD) +
num_elements_to_emit *
cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
@@ -1286,7 +1377,7 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
vpm_cfg.Ve = 0;
vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
- cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_RECORD, shader) {
+ cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
shader.enable_clipping = true;
shader.point_size_in_shaded_vertex_data =
@@ -1400,9 +1491,9 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
* by CS and VS. If we have no attributes being consumed by
* the shader, set up a dummy to be loaded into the VPM.
*/
- cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+ cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
/* Valid address of data whose value will be unused. */
- attr.address = v3dv_cl_address(cmd_buffer->indirect.bo, 0);
+ attr.address = v3dv_cl_address(job->indirect.bo, 0);
attr.type = ATTRIBUTE_FLOAT;
attr.stride = 0;
@@ -1413,13 +1504,13 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
}
}
- cl_emit(&cmd_buffer->bcl, VCM_CACHE_SIZE, vcm) {
+ cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc;
vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
}
- cl_emit(&cmd_buffer->bcl, GL_SHADER_STATE, state) {
- state.address = v3dv_cl_address(cmd_buffer->indirect.bo,
+ cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+ state.address = v3dv_cl_address(job->indirect.bo,
shader_rec_offset);
state.number_of_attribute_arrays = num_elements_to_emit;
}
@@ -1462,6 +1553,9 @@ static void
cmd_buffer_emit_draw_packets(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_draw_info *info)
{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
struct v3dv_pipeline *pipeline = state->pipeline;
@@ -1473,7 +1567,7 @@ cmd_buffer_emit_draw_packets(struct v3dv_cmd_buffer *cmd_buffer,
/* FIXME: using VERTEX_ARRAY_PRIMS always as it fits our test caselist
* right now. Need to be choosen based on the current case.
*/
- cl_emit(&cmd_buffer->bcl, VERTEX_ARRAY_PRIMS, prim) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
prim.mode = hw_prim_type | prim_tf_enable;
prim.length = info->vertex_count;
prim.index_of_first_vertex = info->first_vertex;
diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c
index a7728a22090..fc55be831c0 100644
--- a/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -27,8 +27,7 @@
#include "vk_format_info.h"
static void
-emit_image_loads(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
+emit_image_loads(struct v3dv_cl *cl,
struct v3dv_image *image,
uint32_t layer,
uint32_t mip_level)
@@ -67,8 +66,7 @@ emit_image_loads(struct v3dv_cmd_buffer *cmd_buffer,
}
static void
-emit_buffer_stores(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
+emit_buffer_stores(struct v3dv_cl *cl,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
uint32_t buffer_offset,
@@ -92,13 +90,13 @@ emit_buffer_stores(struct v3dv_cmd_buffer *cmd_buffer,
}
static void
-emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
uint32_t layer,
const VkBufferImageCopy *region)
{
- struct v3dv_cl *cl = &cmd_buffer->indirect;
+ struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
@@ -108,8 +106,7 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
assert(layer < imgrsc->layerCount);
/* Load image to TLB */
- emit_image_loads(cmd_buffer, cl, image,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel);
+ emit_image_loads(cl, image, imgrsc->baseArrayLayer + layer, imgrsc->mipLevel);
cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
fmt.primitive_type = LIST_TRIANGLES;
@@ -130,21 +127,20 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t buffer_stride = width * image->cpp;
uint32_t buffer_offset =
region->bufferOffset + height * buffer_stride * layer;
- emit_buffer_stores(cmd_buffer, cl, buffer, image,
- buffer_offset, buffer_stride);
+ emit_buffer_stores(cl, buffer, image, buffer_offset, buffer_stride);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
- cl_emit(&cmd_buffer->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
-emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_layer_to_buffer(struct v3dv_job *job,
uint32_t min_x_supertile,
uint32_t min_y_supertile,
uint32_t max_x_supertile,
@@ -155,12 +151,12 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t layer,
const VkBufferImageCopy *region)
{
- struct v3dv_cl *rcl = &cmd_buffer->rcl;
+ struct v3dv_cl *rcl = &job->rcl;
const uint32_t tile_alloc_offset =
64 * layer * framebuffer->draw_tiles_x * framebuffer->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(cmd_buffer->tile_alloc, tile_alloc_offset);
+ list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
@@ -189,8 +185,7 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
- emit_copy_layer_to_buffer_per_tile_list(cmd_buffer, buffer, image,
- layer, region);
+ emit_copy_layer_to_buffer_per_tile_list(job, buffer, image, layer, region);
for (int y = min_y_supertile; y <= max_y_supertile; y++) {
for (int x = min_x_supertile; x <= max_x_supertile; x++) {
@@ -203,7 +198,7 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
}
static void
-emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
struct v3dv_framebuffer *framebuffer,
@@ -212,7 +207,7 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
{
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
- struct v3dv_cl *rcl = &cmd_buffer->rcl;
+ struct v3dv_cl *rcl = &job->rcl;
v3dv_cl_ensure_space_with_branch(rcl, 200 +
imgrsc->layerCount * 256 *
cl_packet_length(SUPERTILE_COORDINATES));
@@ -263,7 +258,7 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
for (int layer = 0; layer < imgrsc->layerCount; layer++) {
- emit_copy_layer_to_buffer(cmd_buffer,
+ emit_copy_layer_to_buffer(job,
min_x_supertile, min_y_supertile,
max_x_supertile, max_y_supertile,
buffer, image, framebuffer,
@@ -275,17 +270,17 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
}
static void
-emit_copy_image_to_buffer_bcl(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_image_to_buffer_bcl(struct v3dv_job *job,
struct v3dv_framebuffer *framebuffer,
const VkBufferImageCopy *region)
{
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, 256);
+ v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
- cl_emit(&cmd_buffer->bcl, NUMBER_OF_LAYERS, config) {
+ cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
config.number_of_layers = framebuffer->layers;
}
- cl_emit(&cmd_buffer->bcl, TILE_BINNING_MODE_CFG, config) {
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
config.width_in_pixels = framebuffer->width;
config.height_in_pixels = framebuffer->height;
config.number_of_render_targets = 1;
@@ -293,20 +288,20 @@ emit_copy_image_to_buffer_bcl(struct v3dv_cmd_buffer *cmd_buffer,
config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
}
- cl_emit(&cmd_buffer->bcl, FLUSH_VCD_CACHE, bin);
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
- cl_emit(&cmd_buffer->bcl, OCCLUSION_QUERY_COUNTER, counter);
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
- cl_emit(&cmd_buffer->bcl, START_TILE_BINNING, bin);
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
- cl_emit(&cmd_buffer->bcl, CLIP_WINDOW, clip) {
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
clip.clip_window_left_pixel_coordinate = region->imageOffset.x;
clip.clip_window_bottom_pixel_coordinate = region->imageOffset.y;
clip.clip_window_width_in_pixels = region->imageExtent.width;
clip.clip_window_height_in_pixels = region->imageExtent.height;
}
- cl_emit(&cmd_buffer->bcl, FLUSH, flush);
+ cl_emit(&job->bcl, FLUSH, flush);
}
/* Sets framebuffer dimensions and computes tile size parameters based on the
@@ -365,35 +360,30 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_framebuffer framebuffer;
setup_framebuffer_params(&framebuffer, image, num_layers, internal_bpp);
- /* FIXME: here we assume that we have a valid tile alloc/state setup,
- * which is usually the case for copy after render scenarios. The
- * code below simply checks and asserts this requirement,
- * however, a proper implementation should allocate new tile
- * alloc/state if we don't have one (for example if we haven't
- * recorded a render pass yet) or the one we have isn't large
- * enough. We still need to figure out how we want to handle
- * varying tile alloc/state requirements in a command buffer.
- */
+ struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer);
+
uint32_t tile_alloc_size = 64 * num_layers *
framebuffer.draw_tiles_x *
framebuffer.draw_tiles_y;
tile_alloc_size = align(tile_alloc_size, 4096);
tile_alloc_size += 8192;
tile_alloc_size += 512 * 1024;
- assert(cmd_buffer->tile_alloc &&
- cmd_buffer->tile_alloc->size >= tile_alloc_size);
+ job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
+ v3dv_job_add_bo(job, job->tile_alloc);
const uint32_t tsda_per_tile_size = 256;
const uint32_t tile_state_size = num_layers *
framebuffer.draw_tiles_x *
framebuffer.draw_tiles_y *
tsda_per_tile_size;
- assert(cmd_buffer->tile_state &&
- cmd_buffer->tile_state->size >= tile_state_size);
+ job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
+ v3dv_job_add_bo(job, job->tile_state);
- emit_copy_image_to_buffer_bcl(cmd_buffer, &framebuffer, region);
- emit_copy_image_to_buffer_rcl(cmd_buffer, buffer, image,
+ emit_copy_image_to_buffer_bcl(job, &framebuffer, region);
+ emit_copy_image_to_buffer_rcl(job, buffer, image,
&framebuffer, internal_type, region);
+
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
}
void
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 3ef1d14e945..d9c698f1a98 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -439,6 +439,30 @@ struct v3dv_dynamic_state {
extern const struct v3dv_dynamic_state default_dynamic_state;
+struct v3dv_job {
+ struct list_head list_link;
+
+ struct v3dv_cmd_buffer *cmd_buffer;
+
+ struct v3dv_cl bcl;
+ struct v3dv_cl rcl;
+ struct v3dv_cl indirect;
+
+ /* Set of all BOs referenced by the job. This will be used for making
+ * the list of BOs that the kernel will need to have paged in to
+ * execute our job.
+ */
+ struct set *bos;
+ uint32_t bo_count;
+
+ struct v3dv_bo *tile_alloc;
+ struct v3dv_bo *tile_state;
+
+ bool tmu_dirty_rcl;
+};
+
+void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
+
struct v3dv_cmd_buffer_state {
const struct v3dv_render_pass *pass;
const struct v3dv_framebuffer *framebuffer;
@@ -456,8 +480,8 @@ struct v3dv_cmd_buffer_state {
struct v3dv_dynamic_state dynamic;
uint32_t dirty;
- /* FIXME: here? */
- bool tmu_dirty_rcl;
+ /* Current job being recorded */
+ struct v3dv_job *job;
};
struct v3dv_cmd_buffer {
@@ -471,26 +495,16 @@ struct v3dv_cmd_buffer {
VkCommandBufferUsageFlags usage_flags;
VkCommandBufferLevel level;
- struct v3dv_cl bcl;
- struct v3dv_cl rcl;
- struct v3dv_cl indirect;
-
enum v3dv_cmd_buffer_status status;
struct v3dv_cmd_buffer_state state;
- /* Set of all BOs referenced by the job. This will be used for making
- * the list of BOs that the kernel will need to have paged in to
- * execute our job.
- */
- struct set *bos;
- uint32_t bo_count;
-
- struct v3dv_bo *tile_alloc;
- struct v3dv_bo *tile_state;
+ /* List of jobs to submit to the kernel */
+ struct list_head submit_jobs;
};
-void v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo);
+struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer);
+void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
struct v3dv_shader_module {
unsigned char sha1[20];
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index 678bfb4d813..186c9f07301 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -29,23 +29,23 @@
#include <errno.h>
static void
-v3dv_clif_dump(struct v3dv_queue *queue,
- struct v3dv_cmd_buffer *cmd_buffer,
+v3dv_clif_dump(struct v3dv_device *device,
+ struct v3dv_job *job,
struct drm_v3d_submit_cl *submit)
{
if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
return;
- struct clif_dump *clif = clif_dump_init(&queue->device->devinfo,
+ struct clif_dump *clif = clif_dump_init(&device->devinfo,
stderr,
V3D_DEBUG & V3D_DEBUG_CL);
- set_foreach(cmd_buffer->bos, entry) {
+ set_foreach(job->bos, entry) {
struct v3dv_bo *bo = (void *)entry->key;
char *name = ralloc_asprintf(NULL, "%s_0x%x",
"" /* bo->name */ , bo->offset);
- v3dv_bo_map(queue->device, bo, bo->size);
+ v3dv_bo_map(device, bo, bo->size);
clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
ralloc_free(name);
@@ -57,17 +57,9 @@ v3dv_clif_dump(struct v3dv_queue *queue,
}
static VkResult
-queue_submit(struct v3dv_queue *queue,
- const VkSubmitInfo *pSubmit,
- VkFence fence)
+job_submit(struct v3dv_job *job)
{
- /* FIXME */
- assert(fence == 0);
- assert(pSubmit->waitSemaphoreCount == 0);
- assert(pSubmit->signalSemaphoreCount == 0);
- assert(pSubmit->commandBufferCount == 1);
-
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pSubmit->pCommandBuffers[0]);
+ assert(job);
struct drm_v3d_submit_cl submit;
@@ -79,36 +71,37 @@ queue_submit(struct v3dv_queue *queue,
/* Update the sync object for the last rendering by our context. */
submit.out_sync = 0; /* FIXME */
- submit.bcl_start = cmd_buffer->bcl.bo->offset;
- submit.bcl_end = cmd_buffer->bcl.bo->offset + v3dv_cl_offset(&cmd_buffer->bcl);
- submit.rcl_start = cmd_buffer->rcl.bo->offset;
- submit.rcl_end = cmd_buffer->rcl.bo->offset + v3dv_cl_offset(&cmd_buffer->rcl);
+ submit.bcl_start = job->bcl.bo->offset;
+ submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
+ submit.rcl_start = job->rcl.bo->offset;
+ submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
submit.flags = 0;
/* FIXME: we already know that we support cache flush, as we only support
* hw that supports that, but would be better to just DRM-ask it
*/
- if (cmd_buffer->state.tmu_dirty_rcl)
+ if (job->tmu_dirty_rcl)
submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
- submit.qma = cmd_buffer->tile_alloc->offset;
- submit.qms = cmd_buffer->tile_alloc->size;
- submit.qts = cmd_buffer->tile_state->offset;
+ submit.qma = job->tile_alloc->offset;
+ submit.qms = job->tile_alloc->size;
+ submit.qts = job->tile_state->offset;
- submit.bo_handle_count = cmd_buffer->bo_count;
+ submit.bo_handle_count = job->bo_count;
uint32_t *bo_handles =
(uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit.bo_handle_count * 2));
uint32_t bo_idx = 0;
- set_foreach(cmd_buffer->bos, entry) {
+ set_foreach(job->bos, entry) {
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
bo_handles[bo_idx++] = bo->handle;
}
assert(bo_idx == submit.bo_handle_count);
submit.bo_handles = (uintptr_t)(void *)bo_handles;
- v3dv_clif_dump(queue, cmd_buffer, &submit);
+ struct v3dv_device *device = job->cmd_buffer->device;
+ v3dv_clif_dump(device, job, &submit);
- int ret = v3dv_ioctl(queue->device->fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
+ int ret = v3dv_ioctl(device->fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
static bool warned = false;
if (ret && !warned) {
fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
@@ -124,6 +117,29 @@ queue_submit(struct v3dv_queue *queue,
return VK_SUCCESS;
}
+static VkResult
+queue_submit(struct v3dv_queue *queue,
+ const VkSubmitInfo *pSubmit,
+ VkFence fence)
+{
+ /* FIXME */
+ assert(fence == 0);
+ assert(pSubmit->waitSemaphoreCount == 0);
+ assert(pSubmit->signalSemaphoreCount == 0);
+ assert(pSubmit->commandBufferCount == 1);
+
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pSubmit->pCommandBuffers[0]);
+
+ list_for_each_entry_safe(struct v3dv_job, job,
+ &cmd_buffer->submit_jobs, list_link) {
+ VkResult result = job_submit(job);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
VkResult
v3dv_QueueSubmit(VkQueue _queue,
uint32_t submitCount,
diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c
index 109dbe47158..0652753fb9b 100644
--- a/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/src/broadcom/vulkan/v3dv_uniforms.c
@@ -34,6 +34,9 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
struct v3d_uniform_list *uinfo = &p_stage->prog_data.base->uniforms;
struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
/* The hardware always pre-fetches the next uniform (also when there
* aren't any), so we always allocate space for an extra slot. This
* fixes MMU exceptions reported since Linux kernel 5.4 when the
@@ -42,13 +45,11 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
* the last uniform it will read beyond the end of the page and trigger
* the MMU exception.
*/
- v3dv_cl_ensure_space(&cmd_buffer->indirect, (uinfo->count + 1) * 4, 4);
+ v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
- struct v3dv_cl_reloc uniform_stream =
- v3dv_cl_get_address(&cmd_buffer->indirect);
+ struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
- struct v3dv_cl_out *uniforms =
- cl_start(&cmd_buffer->indirect);
+ struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
for (int i = 0; i < uinfo->count; i++) {
uint32_t data = uinfo->data[i];
@@ -79,7 +80,7 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
}
}
- cl_end(&cmd_buffer->indirect, uniforms);
+ cl_end(&job->indirect, uniforms);
return uniform_stream;
}