summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2022-05-22 10:43:42 +0200
committerDylan Baker <dylan.c.baker@intel.com>2022-05-26 09:10:37 -0700
commitc9d0d37f92114a1a9d7bd65752e3203b8e5fd1c0 (patch)
treec9a916fb7972d268ba4a7d0ef100613e2e8cfa49
parent324bcca2281e556ea0eba8ee439382f1e16b2f38 (diff)
radv: Implement conditional rendering for async compute queue.
MEC (the compute queue firmware) does not support real predication, so we have to emulate that using COND_EXEC packets before each dispatch. Additionally, COND_EXEC doesn't have an inverted mode, so in order to support inverted mode conditional rendering, we allocate a new piece of memory in which we invert the condition. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6533 Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16653> (cherry picked from commit 85a4c5b35131ab4279101821c060bdd2ce58c3ea)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c95
2 files changed, 88 insertions, 9 deletions
diff --git a/.pick_status.json b/.pick_status.json
index db382c0ab4a..c35fc2f1025 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -348,7 +348,7 @@
"description": "radv: Implement conditional rendering for async compute queue.",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"because_sha": null
},
{
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c193555fb64..146cd249146 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -6034,6 +6034,26 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
}
}
+/**
+ * Emulates predication for MEC using COND_EXEC.
+ * When the current command buffer is predicating, emit a COND_EXEC packet
+ * so that the MEC skips the next few dwords worth of packets.
+ */
+static void
+radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords)
+{
+ if (!cmd_buffer->state.predicating)
+ return;
+
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
+ radeon_emit(cs, cmd_buffer->state.predication_va);
+ radeon_emit(cs, cmd_buffer->state.predication_va >> 32);
+ radeon_emit(cs, 0); /* Cache policy */
+ radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
+}
+
static void
radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count,
uint32_t use_opaque)
@@ -7195,7 +7215,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
+ ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30);
if (compute_shader->info.wave_size == 32) {
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
@@ -7218,7 +7238,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | PKT3_SHADER_TYPE_S(1));
+ radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */);
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->va);
radeon_emit(cs, info->va >> 32);
radeon_emit(cs, dispatch_initiator);
@@ -7289,6 +7310,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
}
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */);
+ predicating = false;
+ }
+
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, blocks[0]);
radeon_emit(cs, blocks[1]);
@@ -8624,11 +8650,59 @@ radv_CmdBeginConditionalRenderingEXT(
pred_op = PREDICATION_OP_BOOL64;
}
- /* Enable predication for this command buffer. */
- si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
- cmd_buffer->state.predicating = true;
+
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ /* MEC does not support real predication, so we have to emulate that
+ * using COND_EXEC packets before each dispatch.
+ */
+
+ if (!draw_visible) {
+ /* COND_EXEC can only skip the next packet(s) when the condition is 0.
+ * When using inverted conditional rendering, we allocate some space in
+ * the upload BO and emit some packets to invert the condition.
+ */
+
+ uint64_t pred_value = 0, pred_va;
+ unsigned pred_offset;
+
+ radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset);
+ pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+
+ /* Write 1 to the new predication VA. */
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 1);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, pred_va);
+ radeon_emit(cs, pred_va >> 32);
+
+ /* If the API predication VA == 0, skip next command. */
+ radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 6); /* 1x COPY_DATA size */
+
+ /* Write 0 to the new predication VA (when the API condition != 0) */
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, pred_va);
+ radeon_emit(cs, pred_va >> 32);
+
+ va = pred_va;
+ draw_visible = true;
+ }
+ } else {
+ /* Enable predication for this command buffer. */
+ si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
+ }
/* Store conditional rendering user info. */
+ cmd_buffer->state.predicating = true;
cmd_buffer->state.predication_type = draw_visible;
cmd_buffer->state.predication_op = pred_op;
cmd_buffer->state.predication_va = va;
@@ -8639,11 +8713,16 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- /* Disable predication for this command buffer. */
- si_emit_set_predication_state(cmd_buffer, false, 0, 0);
- cmd_buffer->state.predicating = false;
+ /* Note, MEC doesn't support predication, so we
+ * don't have to emit anything for MEC here.
+ */
+ if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ /* Disable predication for this command buffer. */
+ si_emit_set_predication_state(cmd_buffer, false, 0, 0);
+ }
/* Reset conditional rendering user info. */
+ cmd_buffer->state.predicating = false;
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.predication_op = 0;
cmd_buffer->state.predication_va = 0;