diff options
author | Danylo Piliaiev <dpiliaiev@igalia.com> | 2021-01-12 17:03:11 +0200 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-01-13 09:55:47 +0000 |
commit | 5331b1d9456e674751ffe0d68c08e0c6d3ea0d17 (patch) | |
tree | 5ecd387c06880a5ddf60dfca2ec11dc1e797f66a | |
parent | a6ae7b242160ab867b1eda3c3fb4f75020129338 (diff) |
turnip: implement indirect dispatch
Vulkan guarantees only 4 byte alignment of offset for vkCmdDrawIndirect,
while CP_LOAD_STATE.EXT_SRC_ADDR requires 16 byte alignment which
makes us copy indirect parameters to a correctly aligned buffer.
Blob does essentially the same but emits indirect CP_LOAD_STATE
with src = SS6_UBO and EXT_SRC_ADDR = 0xe0000, and only for a
first dispatch.
Fixes:
dEQP-VK.compute.indirect_dispatch.*
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8444>
-rw-r--r-- | .gitlab-ci/deqp-freedreno-a630-fails.txt | 2 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.c | 37 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_private.h | 2 |
3 files changed, 36 insertions, 5 deletions
diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index cadefbf5d74..3ead6bc1220 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -12,8 +12,6 @@ KHR-GL33.transform_feedback.query_vertex_interleaved_test,Fail KHR-GL33.transform_feedback.query_vertex_separate_test,Fail KHR-GL33.cull_distance.coverage,Fail dEQP-VK.api.object_management.single_alloc_callbacks.compute_pipeline,Fail -dEQP-VK.compute.indirect_dispatch.gen_in_compute.multiple_groups_multiple_invocations,Fail -dEQP-VK.compute.indirect_dispatch.upload_buffer.multiple_groups,Fail dEQP-VK.draw.shader_viewport_index.fragment_shader_12,Fail dEQP-VK.draw.shader_viewport_index.fragment_shader_2,Fail dEQP-VK.draw.shader_viewport_index.vertex_shader_9,Fail diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index b89248dae7f..8e07f82e0b9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3786,7 +3786,8 @@ struct tu_dispatch_info }; static void -tu_emit_compute_driver_params(struct tu_cs *cs, struct tu_pipeline *pipeline, +tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, struct tu_pipeline *pipeline, const struct tu_dispatch_info *info) { gl_shader_stage type = MESA_SHADER_COMPUTE; @@ -3821,8 +3822,38 @@ tu_emit_compute_driver_params(struct tu_cs *cs, struct tu_pipeline *pipeline, uint32_t i; for (i = 0; i < num_consts; i++) tu_cs_emit(cs, driver_params[i]); + } else if (!(info->indirect_offset & 0xf)) { + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit_qw(cs, tu_buffer_iova(info->indirect) + info->indirect_offset); } else { - tu_finishme("Indirect driver params"); + /* Vulkan guarantees only 4 byte alignment for indirect_offset. + * However, CP_LOAD_STATE.EXT_SRC_ADDR needs 16 byte alignment. + */ + + uint64_t indirect_iova = tu_buffer_iova(info->indirect) + info->indirect_offset; + + for (uint32_t i = 0; i < 3; i++) { + tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5); + tu_cs_emit(cs, 0); + tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[i])); + tu_cs_emit_qw(cs, indirect_iova + i * 4); + } + + tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0])); } } @@ -3844,7 +3875,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, tu_cs_emit_state_ib(cs, tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_COMPUTE)); - tu_emit_compute_driver_params(cs, pipeline, info); + tu_emit_compute_driver_params(cmd, cs, pipeline, info); if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD) tu_cs_emit_state_ib(cs, pipeline->load_state); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 698253bb239..053a68beb87 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -339,6 +339,8 @@ struct tu6_global uint32_t pad[7]; } flush_base[4]; + ALIGN16 uint32_t cs_indirect_xyz[3]; + /* note: larger global bo will be used for customBorderColors */ struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[]; }; |